def prepare(self, da, freq, **indexer): pfreq, anchor = self.split_freq(freq) null = self.is_null(da, freq, **indexer) c = null.sum(dim="time") # Otherwise simply use the start and end dates to find the expected number of days. if pfreq.endswith("S"): start_time = c.indexes["time"] end_time = start_time.shift(1, freq=freq) else: end_time = c.indexes["time"] start_time = end_time.shift(-1, freq=freq) if indexer: # Create a full synthetic time series and compare the number of days with the original series. t0 = str(start_time[0].date()) t1 = str(end_time[-1].date()) if isinstance(c.indexes["time"], xr.CFTimeIndex): cal = da.time.encoding.get("calendar") t = xr.cftime_range(t0, t1, freq="D", calendar=cal) else: t = pd.date_range(t0, t1, freq="D") sda = xr.DataArray(data=np.ones(len(t)), coords={"time": t}, dims=("time",)) st = generic.select_time(sda, **indexer) count = st.notnull().resample(time=freq).sum(dim="time") else: n = (end_time - start_time).days count = xr.DataArray(n.values, coords={"time": c.time}, dims="time") return null, count
def is_null(da, freq, **indexer): # Compute the number of days in the time series during each period at the given frequency. selected = generic.select_time(da, **indexer) if selected.time.size == 0: raise ValueError("No data for selected period.") return selected.isnull().resample(time=freq)
def is_null(da, freq, **indexer): """Return a boolean array indicating which values are null.""" selected = generic.select_time(da, **indexer) if selected.time.size == 0: raise ValueError("No data for selected period.") null = selected.isnull() if freq: return null.resample(time=freq) return null
def prepare(self, da, freq, src_timestep, **indexer): """Prepare arrays to be fed to the `is_missing` function. Parameters ---------- da : xr.DataArray Input data. freq : str Resampling frequency defining the periods defined in http://pandas.pydata.org/pandas-docs/stable/timeseries.html#resampling. src_timestep : {"D", "H"} Expected input frequency. **indexer : {dim: indexer, }, optional Time attribute and values over which to subset the array. For example, use season='DJF' to select winter values, month=1 to select January, or month=[6,7,8] to select summer months. If not indexer is given, all values are considered. Returns ------- xr.DataArray, xr.DataArray Boolean array indicating which values are null, array of expected number of valid values. Notes ----- If `freq=None` and an indexer is given, then missing values during period at the start or end of array won't be flagged. """ # This function can probably be made simpler once CFPeriodIndex is implemented. null = self.is_null(da, freq, **indexer) pfreq, anchor = self.split_freq(freq) c = null.sum(dim="time") # Otherwise simply use the start and end dates to find the expected number of days. if pfreq.endswith("S"): start_time = c.indexes["time"] end_time = start_time.shift(1, freq=freq) elif pfreq: end_time = c.indexes["time"] start_time = end_time.shift(-1, freq=freq) else: i = da.time.to_index() start_time = i[:1] end_time = i[-1:] if indexer: # Create a full synthetic time series and compare the number of days with the original series. t = date_range( start_time[0], end_time[-1], freq=src_timestep, calendar=get_calendar(da), ) sda = xr.DataArray(data=np.ones(len(t)), coords={"time": t}, dims=("time", )) st = generic.select_time(sda, **indexer) if freq: count = st.notnull().resample(time=freq).sum(dim="time") else: count = st.notnull().sum(dim="time") else: delta = end_time - start_time n = delta.astype(_np_timedelta64[src_timestep]) if freq: count = xr.DataArray(n.values, coords={"time": c.time}, dims="time") else: count = xr.DataArray(n.values[0] + 1) return null, count
def compute(da: xr.DataArray, **indexer): select = select_time(da, **indexer) return select.mean(dim="time", keep_attrs=True)