def trim_array(array, pad=5): """Trims down array to array size that encompasses data and minor padding for input array. Inputs: array to trim, radius of padding around array.""" pad = pad * 2 rows = da.any(array, axis=1) cols = da.any(array, axis=0) ymin, ymax = np.where(rows)[0][[0, -1]] xmin, xmax = np.where(cols)[0][[0, -1]] #Trims array trim = array[(ymin - pad):(ymax + 1 + pad), (xmin - pad):(xmax + 1 + pad)] return trim
def first_run( da: xr.DataArray, window: int, dim: str = "time", coord: Optional[Union[str, bool]] = False, ufunc_1dim: Union[str, bool] = "from_context", ) -> xr.DataArray: """Return the index of the first item of the first run of at least a given length. Parameters ---------- da : xr.DataArray Input N-dimensional DataArray (boolean). window : int Minimum duration of consecutive run to accumulate values. When equal to 1, an optimized version of the algorithm is used. dim : str Dimension along which to calculate consecutive run (default: 'time'). coord : Optional[str] If not False, the function returns values along `dim` instead of indexes. If `dim` has a datetime dtype, `coord` can also be a str of the name of the DateTimeAccessor object to use (ex: 'dayofyear'). ufunc_1dim : Union[str, bool] Use the 1d 'ufunc' version of this function : default (auto) will attempt to select optimal usage based on number of data points. Using 1D_ufunc=True is typically more efficient for DataArray with a small number of grid points. Ignored when `window=1`. Returns ------- xr.DataArray Index (or coordinate if `coord` is not False) of first item in first valid run. Returns np.nan if there are no valid runs. """ ufunc_1dim = use_ufunc(ufunc_1dim, da, dim=dim) da = da.fillna( 0) # We expect a boolean array, but there could be NaNs nonetheless if window == 1: out = xr.where(da.any(dim=dim), da.argmax(dim=dim), np.NaN) elif ufunc_1dim: out = first_run_ufunc(x=da, window=window, dim=dim) else: da = da.astype("int") i = xr.DataArray(np.arange(da[dim].size), dims=dim) ind = xr.broadcast(i, da)[0].transpose(*da.dims) if isinstance(da.data, dsk.Array): ind = ind.chunk(da.chunks) wind_sum = da.rolling({dim: window}).sum(skipna=False) out = ind.where(wind_sum >= window).min(dim=dim) - (window - 1) # remove window - 1 as rolling result index is last element of the moving window if coord: crd = da[dim] if isinstance(coord, str): crd = getattr(crd.dt, coord) out = lazy_indexing(crd, out) if dim in out.coords: out = out.drop_vars(dim) return out