def calc_image_cell_size(vis_dataset, global_dataset, pixels_per_beam=7): """ Calculates the image and and cell size needed for imaging a vis_dataset. It uses the perfectly-illuminated circular aperture approximation to determine the field of view and pixels_per_beam for the cell size. Parameters ---------- vis_dataset : xarray.core.dataset.Dataset Input visibility dataset. global_dataset : xarray.core.dataset.Dataset Input global dataset (needed for antenna diameter). Returns ------- imsize : list of ints Number of pixels for each spatial dimension. cell : list of ints, units = arcseconds Cell size. """ import xarray import numpy as np import dask.array as da rad_to_arc = (3600 * 180) / np.pi # Radians to arcseconds c = 299792458 f_min = da.nanmin(vis_dataset.chan) f_max = da.nanmax(vis_dataset.chan) D_min = np.nanmin(global_dataset.ANT_DISH_DIAMETER) #D_min = min_dish_diameter # Calculate cell size using pixels_per_beam cell = rad_to_arc * np.array([ c / (da.nanmax(vis_dataset.UVW[:, :, 0].data) * f_max), c / (da.nanmax(vis_dataset.UVW[:, :, 1].data) * f_max) ]) / pixels_per_beam # If cell sizes are within 20% of each other use the smaller cell size for both. if (cell[0] / cell[1] < 1.2) and (cell[1] / cell[0] < 1.2): cell[:] = np.min(cell) # Calculate imsize using the perfectly-illuminated circular aperture approximation FWHM_max = np.array((rad_to_arc * (1.02 * c / (D_min * f_min)))) imsize = FWHM_max / cell # Find an image size that is (2^n)*10 when muliplied with the gridding padding and n is an integer padding = 1.2 if imsize[0] < 1: imsize[0] = 1 if imsize[1] < 1: imsize[1] = 1 n_power = np.ceil(np.log2(imsize / 10)) imsize = np.ceil(((2**n_power) * 10) / padding) return cell, imsize
def get_sample_from_bil_info(self, data, fill_value=None, output_shape=None): """Resample using pre-computed resampling LUTs.""" del output_shape fill_value = _check_fill_value(fill_value, data.dtype) p_1, p_2, p_3, p_4 = self._slice_data(data, fill_value) s__, t__ = self.bilinear_s, self.bilinear_t res = (p_1 * (1 - s__) * (1 - t__) + p_2 * s__ * (1 - t__) + p_3 * (1 - s__) * t__ + p_4 * s__ * t__) epsilon = 1e-6 data_min = da.nanmin(data) - epsilon data_max = da.nanmax(data) + epsilon idxs = (res > data_max) | (res < data_min) res = da.where(idxs, fill_value, res) res = da.where(np.isnan(res), fill_value, res) shp = self.target_geo_def.shape if data.ndim == 3: res = da.reshape(res, (res.shape[0], shp[0], shp[1])) else: res = da.reshape(res, (shp[0], shp[1])) # Add missing coordinates self._add_missing_coordinates(data) res = DataArray(res, dims=data.dims, coords=self.out_coords) return res
def _limit_output_values_to_input(self, data, res, fill_value): epsilon = 1e-6 data_min = da.nanmin(data) - epsilon data_max = da.nanmax(data) + epsilon res = da.where( find_indices_outside_min_and_max(res, data_min, data_max), fill_value, res) return da.where(np.isnan(res), fill_value, res)
def _normalize_data_dask(data, pixel_max, c, th): min_val = da.nanmin(data) max_val = da.nanmax(data) out = da.map_blocks(_normalize_data_cpu, data, min_val, max_val, pixel_max, c, th, meta=np.array(())) return out
def _calculate_summary_statistics(self): data = self._lazy_data() _raveled = data.ravel() _mean, _std, _min, _q1, _q2, _q3, _max = da.compute( da.nanmean(data), da.nanstd(data), da.nanmin(data), da.percentile(_raveled, [25, ]), da.percentile(_raveled, [50, ]), da.percentile(_raveled, [75, ]), da.nanmax(data), ) return _mean, _std, _min, _q1, _q2, _q3, _max
def stadistics(self): headers = ["group", "mean", "std dev", "min", "25%", "50%", "75%", "max", "nonzero", "nonan", "unique", "dtype"] self.chunksize = Chunks.build_from_shape(self.shape, self.dtypes) table = [] for group, (dtype, _) in self.dtypes.fields.items(): values = dict() values["dtype"] = dtype values["group"] = group darray = self.data[group].da if dtype == np.dtype(float) or dtype == np.dtype(int): da_mean = da.around(darray.mean(), decimals=3) da_std = da.around(darray.std(), decimals=3) da_min = da.around(darray.min(), decimals=3) da_max = da.around(darray.max(), decimals=3) result = dask.compute([da_mean, da_std, da_min, da_max])[0] values["mean"] = result[0] if not np.isnan(result[0]) else da.around(da.nanmean(darray), decimals=3).compute() values["std dev"] = result[1] if not np.isnan(result[0]) else da.around(da.nanstd(darray), decimals=3).compute() values["min"] = result[2] if not np.isnan(result[0]) else da.around(da.nanmin(darray), decimals=3).compute() values["max"] = result[3] if not np.isnan(result[0]) else da.around(da.nanmax(darray), decimals=3).compute() if len(self.shape[group]) == 1: da_percentile = da.around(da.percentile(darray, [25, 50, 75]), decimals=3) result = da_percentile.compute() values["25%"] = result[0] values["50%"] = result[1] values["75%"] = result[2] else: values["25%"] = "-" values["50%"] = "-" values["75%"] = "-" values["nonzero"] = da.count_nonzero(darray).compute() values["nonan"] = da.count_nonzero(da.notnull(darray)).compute() values["unique"] = "-" else: values["mean"] = "-" values["std dev"] = "-" values["min"] = "-" values["max"] = "-" values["25%"] = "-" values["50%"] = "-" values["75%"] = "-" values["nonzero"] = "-" values["nonan"] = da.count_nonzero(da.notnull(darray)).compute() vunique = darray.to_dask_dataframe().fillna('').nunique().compute() values["unique"] = vunique row = [] for column in headers: row.append(values[column]) table.append(row) print("# rows {}".format(self.shape[0])) return tabulate(table, headers)
def _run_dask_numpy_equal_interval(data, k): max_data = da.nanmax(data) min_data = da.nanmin(data) width = (max_data - min_data) / k cuts = da.arange(min_data + width, max_data + width, width) l_cuts = cuts.shape[0] if l_cuts > k: # handle overshooting cuts = cuts[0:k] # work around to assign cuts[-1] = max_data bins = da.concatenate([cuts[:k - 1], [max_data]]) out = _bin(data, bins, np.arange(l_cuts)) return out
def test_nan(): x = np.array([[1, np.nan, 3, 4], [5, 6, 7, np.nan], [9, 10, 11, 12]]) d = da.from_array(x, chunks=(2, 2)) assert_eq(np.nansum(x), da.nansum(d)) assert_eq(np.nansum(x, axis=0), da.nansum(d, axis=0)) assert_eq(np.nanmean(x, axis=1), da.nanmean(d, axis=1)) assert_eq(np.nanmin(x, axis=1), da.nanmin(d, axis=1)) assert_eq(np.nanmax(x, axis=(0, 1)), da.nanmax(d, axis=(0, 1))) assert_eq(np.nanvar(x), da.nanvar(d)) assert_eq(np.nanstd(x, axis=0), da.nanstd(d, axis=0)) assert_eq(np.nanargmin(x, axis=0), da.nanargmin(d, axis=0)) assert_eq(np.nanargmax(x, axis=0), da.nanargmax(d, axis=0)) assert_eq(np.nanprod(x), da.nanprod(d))
def test_nan(): x = np.array([[1, np.nan, 3, 4], [5, 6, 7, np.nan], [9, 10, 11, 12]]) d = da.from_array(x, blockshape=(2, 2)) assert eq(np.nansum(x), da.nansum(d)) assert eq(np.nansum(x, axis=0), da.nansum(d, axis=0)) assert eq(np.nanmean(x, axis=1), da.nanmean(d, axis=1)) assert eq(np.nanmin(x, axis=1), da.nanmin(d, axis=1)) assert eq(np.nanmax(x, axis=(0, 1)), da.nanmax(d, axis=(0, 1))) assert eq(np.nanvar(x), da.nanvar(d)) assert eq(np.nanstd(x, axis=0), da.nanstd(d, axis=0)) assert eq(np.nanargmin(x, axis=0), da.nanargmin(d, axis=0)) assert eq(np.nanargmax(x, axis=0), da.nanargmax(d, axis=0)) with ignoring(AttributeError): assert eq(np.nanprod(x), da.nanprod(d))
def _calculate_summary_statistics(self, rechunk=True): if rechunk is True: # Use dask auto rechunk instead of HyperSpy's one, what should be # better for these operations rechunk = "dask_auto" data = self._lazy_data(rechunk=rechunk) _raveled = data.ravel() _mean, _std, _min, _q1, _q2, _q3, _max = da.compute( da.nanmean(data), da.nanstd(data), da.nanmin(data), da.percentile(_raveled, [25, ]), da.percentile(_raveled, [50, ]), da.percentile(_raveled, [75, ]), da.nanmax(data), ) return _mean, _std, _min, _q1, _q2, _q3, _max
def test_nan(): x = np.array([[1, np.nan, 3, 4], [5, 6, 7, np.nan], [9, 10, 11, 12]]) d = da.from_array(x, chunks=(2, 2)) assert_eq(np.nansum(x), da.nansum(d)) assert_eq(np.nansum(x, axis=0), da.nansum(d, axis=0)) assert_eq(np.nanmean(x, axis=1), da.nanmean(d, axis=1)) assert_eq(np.nanmin(x, axis=1), da.nanmin(d, axis=1)) assert_eq(np.nanmax(x, axis=(0, 1)), da.nanmax(d, axis=(0, 1))) assert_eq(np.nanvar(x), da.nanvar(d)) assert_eq(np.nanstd(x, axis=0), da.nanstd(d, axis=0)) assert_eq(np.nanargmin(x, axis=0), da.nanargmin(d, axis=0)) assert_eq(np.nanargmax(x, axis=0), da.nanargmax(d, axis=0)) assert_eq(nanprod(x), da.nanprod(d))
def test_reductions(): x = np.arange(5).astype('f4') a = da.from_array(x, chunks=(2,)) assert eq(da.all(a), np.all(x)) assert eq(da.any(a), np.any(x)) assert eq(da.argmax(a, axis=0), np.argmax(x, axis=0)) assert eq(da.argmin(a, axis=0), np.argmin(x, axis=0)) assert eq(da.max(a), np.max(x)) assert eq(da.mean(a), np.mean(x)) assert eq(da.min(a), np.min(x)) assert eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0)) assert eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0)) assert eq(da.nanmax(a), np.nanmax(x)) assert eq(da.nanmin(a), np.nanmin(x)) assert eq(da.nansum(a), np.nansum(x)) assert eq(da.nanvar(a), np.nanvar(x)) assert eq(da.nanstd(a), np.nanstd(x))
def test_reductions(): x = np.arange(5).astype('f4') a = da.from_array(x, blockshape=(2, )) assert eq(da.all(a), np.all(x)) assert eq(da.any(a), np.any(x)) assert eq(da.argmax(a, axis=0), np.argmax(x, axis=0)) assert eq(da.argmin(a, axis=0), np.argmin(x, axis=0)) assert eq(da.max(a), np.max(x)) assert eq(da.mean(a), np.mean(x)) assert eq(da.min(a), np.min(x)) assert eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0)) assert eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0)) assert eq(da.nanmax(a), np.nanmax(x)) assert eq(da.nanmin(a), np.nanmin(x)) assert eq(da.nansum(a), np.nansum(x)) assert eq(da.nanvar(a), np.nanvar(x)) assert eq(da.nanstd(a), np.nanstd(x))
def _concatenate_chunks(chunks): """Concatenate chunks to full output array.""" # Form the full array col, res = [], [] prev_y = 0 for y, x in sorted(chunks): if len(chunks[(y, x)]) > 1: chunk = da.nanmax(da.stack(chunks[(y, x)], axis=-1), axis=-1) else: chunk = chunks[(y, x)][0] if y == prev_y: col.append(chunk) continue res.append(da.concatenate(col, axis=1)) col = [chunk] prev_y = y res.append(da.concatenate(col, axis=1)) res = da.concatenate(res, axis=2) return res
def parallel_gradient_search(data, src_x, src_y, dst_x, dst_y, **kwargs): """Run gradient search in parallel in input area coordinates.""" if data.ndim not in [2, 3]: raise NotImplementedError( 'Gradient search resampling only supports 2D or 3D arrays.') if data.ndim == 2: data = data[np.newaxis, :, :] # TODO: Make sure the data is uniformly chunked. src_gradient_xl, src_gradient_xp = np.gradient(src_x, axis=[0, 1]) src_gradient_yl, src_gradient_yp = np.gradient(src_y, axis=[0, 1]) arrays = reshape_arrays_in_stacked_chunks( (src_x, src_y, src_gradient_xl, src_gradient_xp, src_gradient_yl, src_gradient_yp), src_x.chunks) # TODO: rechunk and reformat the data array src_x, src_y, src_gradient_xl, src_gradient_xp, src_gradient_yl, src_gradient_yp = arrays data = reshape_to_stacked_3d(data) res = da.blockwise(_gradient_resample_data, 'bmnz', data.astype(np.float64), 'bijz', src_x, 'ijz', src_y, 'ijz', src_gradient_xl, 'ijz', src_gradient_xp, 'ijz', src_gradient_yl, 'ijz', src_gradient_yp, 'ijz', dst_x, 'mn', dst_y, 'mn', dtype=np.float64, method=kwargs.get('method', 'bilinear')) return da.nanmax(res, axis=-1).squeeze()
data_dask = da.from_array(data, chunks=(1, 444, 922)) # Yields an unevaluated dask array data_dask.min() # dask.array<amin-aggregate, shape=(), dtype=float64, chunksize=()> # Force computation data_dask.min().compute() # nan # No min(), because there are NaN values # Dask has NaN-aware computations da.nanmin(data_dask).compute() # -22.329354809176536 lo = da.nanmin(data_dask).compute() hi = da.nanmax(data_dask).compute() print(lo, hi) # -22.3293548092 47.7625806255 # Visualizing the temperature maps - # Number of images N_months = data_dask.shape[0] import matplotlib.pyplot as plt fig, panels = plt.subplots(nrows=4, ncols=3) for month, panel in zip(range(N_months), panels.flatten()): im = panel.imshow(data_dask[month, :, :], origin='lower', vmin=lo, vmax=hi) panel.set_title('2008-{:02d}'.format(month + 1)) panel.axis('off')
fig, ax = plt.subplots(figsize=[10, 10], constrained_layout=True) base_extent = np.array( [-dims[1] // 2, dims[1] // 2, -dims[2] // 2, dims[2] // 2]) ax.scatter(*cpc, c=cropindices, cmap='nipy_spectral', zorder=5, linewidths=1, edgecolors='black') cfac = 4 coarse_mask = da.coarsen(np.all, da.asarray(mask), {0: cfac, 1: cfac}) cropdata = da.coarsen(np.mean, data[cropindices], {1: cfac, 2: cfac}).persist() xlim, ylim = np.array([ax.get_xlim(), ax.get_ylim()]) vmin, vmax = da.nanmin(cropdata).compute(), da.nanmax(cropdata).compute() for i in range(len(cropdata)): plt.imshow( np.where(coarse_mask, cropdata[i], np.nan).T, extent=base_extent + np.array([cpc[0, i], cpc[0, i], cpc[1, i], cpc[1, i]]), origin='lower', #alpha=0.5, cmap='gray', vmin=vmin, vmax=vmax, ) plt.annotate(str(cropindices[i]), +cpc[:, i], bbox=dict(facecolor='white', alpha=0.4, edgecolor='none')) plt.colorbar()
will fetch the data at the URL: http://localhost/tiles/0/1 This assumes that the data is in CoverageJSON format, and does the work of fetching the data, parsing it, and extracting the actual data as a numpy array. """ for axis, tile_index in zip(axis_names, tile_indices): url_template = url_template.replace('{' + axis + '}', str(tile_index)) # Debug line: uncomment to see which tiles are fetched. # Note that when printing these may get confused due to multithreading #print 'fetching tile from',url_template tile_data = json.loads(get_data(url_template)) tile_values = np.array(tile_data['values'], dtype=float).reshape(tile_data['shape']) return tile_values if __name__ == '__main__': # Usage example. arrs = get_dask_arrays('http://godiva.rdg.ac.uk/coverage/sst-tiled.json') print "Created dask array" sst = arrs['analysed_sst-yx_tiling'] print 'Shape:',sst.shape print "Got array, calculating means:" print 'Northern Eighth', da.nanmean(sst[0,:450,:]).compute() print 'Equatorial Quarter', da.nanmean(sst[0,1350:2250,:]).compute() print 'Southern Eighth', da.nanmean(sst[0,3150:,:]).compute() # Note that even though we defined c100, each tile is still fetched for each calculation. # That's because we've used a naive fetch method, with no caching c100 = sst[0,1700:1900,3500:3700] print 'Central 100 points', da.nanmean(c100).compute() print 'Central 100 points Min/Max', da.nanmin(c100).compute(), da.nanmax(c100).compute()
def get_sample_from_bil_info(self, data, fill_value=np.nan, output_shape=None): if fill_value is None: fill_value = np.nan # FIXME: can be this made into a dask construct ? cols, lines = np.meshgrid(np.arange(data['x'].size), np.arange(data['y'].size)) cols = da.ravel(cols) lines = da.ravel(lines) try: self.valid_input_index = self.valid_input_index.compute() except AttributeError: pass vii = self.valid_input_index.squeeze() try: self.index_array = self.index_array.compute() except AttributeError: pass # ia contains reduced (valid) indices of the source array, and has the # shape of the destination array ia = self.index_array rlines = lines[vii][ia] rcols = cols[vii][ia] slices = [] mask_slices = [] mask_2d_added = False coords = {} try: # FIXME: Use same chunk size as input data coord_x, coord_y = self.target_geo_def.get_proj_vectors_dask() except AttributeError: coord_x, coord_y = None, None for _, dim in enumerate(data.dims): if dim == 'y': slices.append(rlines) if not mask_2d_added: mask_slices.append(ia >= self.target_geo_def.size) mask_2d_added = True if coord_y is not None: coords[dim] = coord_y elif dim == 'x': slices.append(rcols) if not mask_2d_added: mask_slices.append(ia >= self.target_geo_def.size) mask_2d_added = True if coord_x is not None: coords[dim] = coord_x else: slices.append(slice(None)) mask_slices.append(slice(None)) try: coords[dim] = data.coords[dim] except KeyError: pass res = data.values[slices] res[mask_slices] = fill_value try: p_1 = res[:, :, 0] p_2 = res[:, :, 1] p_3 = res[:, :, 2] p_4 = res[:, :, 3] except IndexError: p_1 = res[:, 0] p_2 = res[:, 1] p_3 = res[:, 2] p_4 = res[:, 3] s__, t__ = self.bilinear_s, self.bilinear_t res = (p_1 * (1 - s__) * (1 - t__) + p_2 * s__ * (1 - t__) + p_3 * (1 - s__) * t__ + p_4 * s__ * t__) epsilon = 1e-6 data_min = da.nanmin(data) - epsilon data_max = da.nanmax(data) + epsilon idxs = (res > data_max) | (res < data_min) res = da.where(idxs, fill_value, res) shp = self.target_geo_def.shape if data.ndim == 3: res = da.reshape(res, (res.shape[0], shp[0], shp[1])) else: res = da.reshape(res, (shp[0], shp[1])) res = DataArray(da.from_array(res, chunks=CHUNK_SIZE), dims=data.dims, coords=coords) return res
def composite(src_fps, save_loc, save_nam, method="mean", dt="default"): """Creates a composite from multiple rasters. Individual rasters have to be of the same size (extents, pixel size, data type). Multiple compositing are available, including mean, min, max, median etc. Parameters ---------- src_fps : list(str) List of paths to source files. save_loc : str Path to save folder. save_nam : str Name of the file to be saved. method : str Compositing method, either "mean", "min", "max" or "median". dt : str(optional) Orbit direction, either "DES" or "ASC" (required for generating previews). Returns ------- out_pth : str Absolute path to the product. """ # Make sure save location exists os.makedirs(save_loc, exist_ok=True) # Save TIFF metadata for output with rasterio.open(src_fps[0]) as rst: out_meta = rst.profile.copy() # Lazily load files into DASK ARRAYS print(f"#\n# Preparing Dask arrays...") chunks = {'band': 1, 'x': 1024, 'y': 1024} lazy_arrays = [xr.open_rasterio(fp, chunks=chunks) for fp in src_fps] stacked = da.concatenate(lazy_arrays, axis=0) stacked[stacked == 0] = np.nan # Calculate composite for selected method with dask print(f"# Compositing ({method}) using Dask...") if method == 'mean': comp_out = da.nanmean(stacked, axis=0, keepdims=True).compute() elif method == 'median': comp_out = da.nanmedian(stacked, axis=0, keepdims=True).compute() elif method == 'max': comp_out = da.nanmax(stacked, axis=0, keepdims=True).compute() elif method == 'min': comp_out = da.nanmin(stacked, axis=0, keepdims=True).compute() else: raise Exception('{} is not a valid compositing ' 'method!'.format(method)) # ---------------------------------------------------------------------------- # SAVE RESULTS TO FILES # ---------------------------------------------------------------------------- # Save composite to GeoTIFF tif_time = time.time() print("#\n# Saving composite image to TIFF...") out_nam = save_nam + ".tif" out_pth = os.path.join(save_loc, out_nam) out_meta.update(bigtiff="yes", compress='lzw') with rasterio.open(out_pth, "w", **out_meta) as dest: dest.write(comp_out) tif_time = time.time() - tif_time print(f"# Time (TIFF): {tif_time:.2f} seconds") # # Save preview file as JPEG # jpg_time = time.time() # print("#\n# Saving preview image to JPEG...") # # Pickle array for passing it to plot_preview() # spt = os.path.join(save_loc, "temp_array.p") # with open(spt, "wb") as pf: # pickle.dump(comp_out, pf) # comp_out = None # try: # plot_preview(spt, dt, out_pth[:-3] + "jpg") # except MemoryError as me: # print("# Memory error occurred, could not save to JPEG") # print(me) # finally: # # delete pickle # os.remove(spt) # jpg_time = time.time() - jpg_time # print(f"# Time (JPEG): {jpg_time:.2f} seconds") return out_pth
def composite(src_fps, save_loc, save_nam, method="median", comp_mask="all_bad", bbox=None): # Prepare save location save_dir = os.path.join(save_loc, save_nam) if not os.path.exists(save_dir): os.mkdir(save_dir) # Get extents main_extents = output_image_extent(src_fps, bbox) # Obtain propertis of output array (same for all bands/images) out_extents = main_extents['bounds'] out_w = main_extents['width'] out_h = main_extents['height'] nr_bands = main_extents['bandsCount'] # Initiate arrays for storing noumber of available & good observations nobs = np.zeros((out_h, out_w), dtype=np.int8) nok = nobs.copy() # Create temp dir if it doesn't exist sav_dir = '.\\tmp' if not os.path.exists(sav_dir): os.mkdir(sav_dir) # MAIN LOOP FOR COMPOSITING tTim_A = time.time() tmp_sav_pth = [] for band in range(nr_bands): print("#\n# Creating composite for Band {}".format(band+1)) comp_stack = [] # Loop all images for i, fp in enumerate(src_fps): str_time = time.time() # Open data set src = rasterio.open(fp) # Save copy of profile for writing tiff at the end if band == 0 and i == 0: out_meta = src.profile.copy() print("# Processing Image {}.".format(i+1)) # Skip Reading the image if bbox is out of bounds xL, yD, xR, yU = [xy for xy in src.bounds] xL_out, yD_out, xR_out, yU_out = out_extents chk_bbox = (xL > xR_out or yD > yU_out or xR < xL_out or yU < yD_out) if chk_bbox: print('# Image {} not included (out of bounds).'.format(i)) break # Calculate offset for reading and slicing win, sl_x, sl_y = image_offset(out_extents, src) # ------------------------------ # Read image and store to pickle # ------------------------------ # Set offset Window for reading of TIF subset offset = win # Initiate array for output comp_band = np.full((out_h, out_w), np.nan, dtype=np.float32) # Read image and save to pickle print("# Reading the image.") if band == 0: tmp_read = src.read(window=offset) for nc in range(1, nr_bands): img_nam = ('img' + str(i+1).zfill(2) + "_b" + str(nc+1).zfill(2) + '.p') img_pth = os.path.join(sav_dir, img_nam) pickle.dump(tmp_read[nc], open(img_pth, "wb")) tmp_read = tmp_read[0] else: img_nam = ('img' + str(i+1).zfill(2) + "_b" + str(band+1).zfill(2) + '.p') img_pth = os.path.join(sav_dir, img_nam) tmp_read = pickle.load(open(img_pth, "rb")) # Read the image into the array comp_band[sl_y[0]:sl_y[1], sl_x[0]:sl_x[1]] = tmp_read tmp_read = None src.close() # ------------------------------ # determine bad pixels from mask # ------------------------------ print("# Determining bad pixels.") if band == 0: # Get index of mask idx_bad = get_mask_idx(fp, offset, comp_mask, dilate=-1) # Get index of background idx_bck = get_mask_idx(fp, offset, "background") # Update nok and nobs nobs[sl_y[0]:sl_y[1], sl_x[0]:sl_x[1]] += 1 nok[sl_y[0]:sl_y[1], sl_x[0]:sl_x[1]] += 1 nok[idx_bad[0][0]+sl_y[0], idx_bad[0][1]+sl_x[0]] += -1 nobs[idx_bck[0][0]+sl_y[0], idx_bck[0][1]+sl_x[0]] += -1 # Save index to pickle for later use idx_nam = 'idxBad_' + str(i+1).zfill(2) + '.p' idx_pth = os.path.join(sav_dir, idx_nam) pickle.dump(idx_bad, open(idx_pth, "wb")) idx_bck = None else: # Read from Pickle idx_nam = 'idxBad_' + str(i+1).zfill(2) + '.p' idx_pth = os.path.join(sav_dir, idx_nam) idx_bad = pickle.load(open(idx_pth, "rb")) # Apply mask to image if idx_bad[1] > 0: comp_band[idx_bad[0][0]+sl_y[0], idx_bad[0][1]+sl_x[0]] = np.nan idx_bad = None # Stack comp_band array into Dask Array comp_stack.append(da.from_array(comp_band, chunks=(1024, 1024))) # Close the array to save memory comp_band = None end_time = time.time() print('# --- Time: %s seconds ---' % (end_time-str_time)) # Stack all images into 1 array stacked = da.stack(comp_stack, axis=0) # Calculate composite for selected method with dask print("# Compositing Band {}".format(band+1)) str_time = time.time() if method == 'mean': comp_out = da.nanmean(stacked, axis=0, keepdims=True).compute() elif method == 'median': comp_out = da.nanmedian(stacked, axis=0, keepdims=True).compute() elif method == 'max': comp_out = da.nanmax(stacked, axis=0, keepdims=True).compute() elif method == 'min': comp_out = da.nanmin(stacked, axis=0, keepdims=True).compute() else: raise Exception('{} is not a valid compositing ' 'method!'.format(method)) end_time = time.time() print('# --- Time: %s seconds ---' % (end_time-str_time)) # After one band is resolved, save to temp file and release memory by # deleting the array if nr_bands > 1: print('# Saving temporary composite file for this band.') # Create file name and save using pickle sav_fil = 'b_' + str(band+1).zfill(2) + '.p' sav_pth = os.path.join(sav_dir, sav_fil) pickle.dump(comp_out, open(sav_pth, "wb")) # Add to savePth list with filenames tmp_sav_pth.append(sav_pth) # Clean up workspace comp_out = None tTim_B = time.time() print('--- Total time: %s seconds --- \n' % (tTim_B - tTim_A)) # ---------------------------------------------------------------------------- # OUT OF THE COMPOSITE LOOP RESTORE SAVED FILES AND BUIL TIF # ---------------------------------------------------------------------------- if nr_bands > 1: print("# Restoring saved bands.") str_time = time.time() # Initiate output array comp_out = np.full((nr_bands, out_h, out_w), np.nan, dtype=np.float32) for bnd, pth in enumerate(tmp_sav_pth): comp_out[bnd, :, :] = pickle.load(open(pth, "rb")) # Remove temporary folder rmtree(sav_dir, ignore_errors=True) end_time = time.time() print('--- Time: %s seconds ---' % (end_time-str_time)) # ---------------------------------------------------------------------------- # SAVE RESULTS TO TIF # ---------------------------------------------------------------------------- print("# Saving composite image to TIFF.") str_time = time.time() # Save composite out_nam = save_nam + "_composite.tif" out_pth = os.path.join(save_dir, out_nam) out_px = out_meta["transform"][0] out_py = out_meta["transform"][4] out_trans = Affine(out_px, 0.0, xL_out, 0.0, out_py, yU_out) out_meta.update( height=comp_out.shape[1], width=comp_out.shape[2], transform=out_trans, bigtiff="yes" ) with rasterio.open(out_pth, "w", **out_meta) as dest: dest.write(comp_out) # Save nok mask out_nam = save_nam + "_nok.tif" out_pth = os.path.join(save_dir, out_nam) nok_meta = out_meta.copy() nok_meta.update( count=1, dtype="int8" ) with rasterio.open(out_pth, "w", **nok_meta) as dest: dest.write(np.expand_dims(nok, axis=0)) # Save nobs mask out_nam = save_nam + "_nobs.tif" out_pth = os.path.join(save_dir, out_nam) with rasterio.open(out_pth, "w", **nok_meta) as dest: dest.write(np.expand_dims(nobs, axis=0)) end_time = time.time() print('--- Time: %s seconds ---' % (end_time-str_time)) tTim_B = time.time() print('\n--- Total time: %s seconds --- \n' % (tTim_B - tTim_A))
def scale_varr_da(varr, scale=(0, 1)): return ((varr - darr.nanmin(varr)) * (scale[1] - scale[0]) / (darr.nanmax(varr) - darr.nanmin(varr))) + scale[0]
def _apply(func, datasets, chunk=CHUNK, pad=None, relabel=False, stack=False, compute=True, out=None, normalize=False, **kwargs): """ Appplies a function to a given set of datasets. Wraps a standard function call of the form: func(*datasets, **kwargs) Named parameters gives extra functionality. Parameters ---------- func: callable Function to be mapped across datasets. datasets: list of numpy array-like Input datasets. chunk: boolean If `True` then input datasets will be assumed tobe `Dask.Array`s and the function will be mapped across arrays blocks. pad: None, int or iterable The padding to apply (only if `chunk = True`). If `pad != None` then `dask.array.ghost.map_overlap` will be used to map the function across overlapping blocks, otherwise `dask.array.map_blocks` will be used. relabel: boolean Some of the labelling functions will yield local labelling if `chunk=True`. If `func` is a labelling function, set `relabel = True` to map the result for global consistency. See `survos2.improc.utils.dask_relabel_chunks` for more details. compute: boolean If `True` the result will be computed and returned in numpy array form, otherwise a `dask.delayed` will be returned if `chunk = True`. out: None or numpy array-like if `out != None` then the result will be stored in there. **kwargs: other keyword arguments Arguments to be passed to `func`. Returns ------- result: numpy array-like The computed result if `compute = True` or `chunk = False`, the result of the lazy wrapping otherwise. """ if stack and len(datasets) > 1: dataset = da.stack(datasets, axis=0) dataset = da.rechunk(dataset, chunks=(dataset.shape[0], ) + dataset.chunks[1:]) datasets = [dataset] if chunk == True: kwargs.setdefault('dtype', out.dtype if out else datasets[0].dtype) kwargs.setdefault('drop_axis', 0 if stack else None) if pad is None or pad == False: result = da.map_blocks(func, *datasets, **kwargs) elif len(datasets) == 1: if np.isscalar(pad): pad = [pad] * datasets[0].ndim if stack: pad[0] = 0 # don't pad feature channel depth = {i: d for i, d in enumerate(pad)} trim = {i: d for i, d in enumerate(pad[1:])} else: depth = trim = {i: d for i, d in enumerate(pad)} g = da.ghost.ghost(datasets[0], depth=depth, boundary='reflect') r = g.map_blocks(func, **kwargs) result = da.ghost.trim_internal(r, trim) else: raise ValueError('`pad` only works with single') rchunks = result.chunks if not relabel and normalize: result = result / da.nanmax(da.fabs(result)) if out is not None: result.store(out, compute=True) elif compute: result = result.compute() if relabel: if out is not None: result = dask_relabel_chunks(da.from_array(out, chunks=rchunks)) result.store(out, compute=True) else: result = dask_relabel_chunks( da.from_array(result, chunks=rchunks)) if compute: result = result.compute() else: result = func(*datasets, **kwargs) if out is not None: out[...] = result if out is None: return result