def test_cftime_type_error(): times = xr.cftime_range('2000', periods=24, freq='D') da = xr.DataArray(np.arange(24), coords=[times], dims='time') times_new = xr.cftime_range('2000-01-01T12:00:00', periods=3, freq='D', calendar='noleap') with pytest.raises(TypeError): da.interp(time=times_new)
def test_cftime(): times = xr.cftime_range('2000', periods=24, freq='D') da = xr.DataArray(np.arange(24), coords=[times], dims='time') times_new = xr.cftime_range('2000-01-01T12:00:00', periods=3, freq='D') actual = da.interp(time=times_new) expected = xr.DataArray([0.5, 1.5, 2.5], coords=[times_new], dims=['time']) assert_allclose(actual, expected)
def test_cftimeindex_sub_cftimeindex(calendar): a = xr.cftime_range('2000', periods=5, calendar=calendar) b = a.shift(2, 'D') result = b - a expected = pd.TimedeltaIndex([timedelta(days=2) for _ in range(5)]) assert result.equals(expected) assert isinstance(result, pd.TimedeltaIndex)
def test_cftimeindex_sub_timedeltaindex(calendar): a = xr.cftime_range('2000', periods=5, calendar=calendar) deltas = pd.TimedeltaIndex([timedelta(days=2) for _ in range(5)]) result = a - deltas expected = a.shift(-2, 'D') assert result.equals(expected) assert isinstance(result, CFTimeIndex)
def test_cftime_datetime_mean(): times = cftime_range('2000', periods=4) da = DataArray(times, dims=['time']) assert da.isel(time=0).mean() == da.isel(time=0) expected = DataArray(times.date_type(2000, 1, 2, 12)) result = da.mean() assert_equal(result, expected) da_2d = DataArray(times.values.reshape(2, 2)) result = da_2d.mean() assert_equal(result, expected)
def test_to_datetimeindex(calendar, unsafe): index = xr.cftime_range('2000', periods=5, calendar=calendar) expected = pd.date_range('2000', periods=5) if calendar in _NON_STANDARD_CALENDARS and not unsafe: with pytest.warns(RuntimeWarning, match='non-standard'): result = index.to_datetimeindex() else: result = index.to_datetimeindex(unsafe=unsafe) assert result.equals(expected) np.testing.assert_array_equal(result, expected) assert isinstance(result, pd.DatetimeIndex)
def test_cftime_single_string(): from cftime import DatetimeProlepticGregorian times = xr.cftime_range('2000', periods=24, freq='D') da = xr.DataArray(np.arange(24), coords=[times], dims='time') times_new = '2000-01-01T12:00' actual = da.interp(time=times_new) times_new_array = _parse_array_of_cftime_strings( np.array(times_new), DatetimeProlepticGregorian) expected = xr.DataArray(0.5, coords={'time': times_new_array}) assert_allclose(actual, expected)
def test_cftime_list_of_strings(): from cftime import DatetimeProlepticGregorian times = xr.cftime_range('2000', periods=24, freq='D') da = xr.DataArray(np.arange(24), coords=[times], dims='time') times_new = ['2000-01-01T12:00', '2000-01-02T12:00', '2000-01-03T12:00'] actual = da.interp(time=times_new) times_new_array = _parse_array_of_cftime_strings( np.array(times_new), DatetimeProlepticGregorian) expected = xr.DataArray([0.5, 1.5, 2.5], coords=[times_new_array], dims=['time']) assert_allclose(actual, expected)
def test_calendars(calendar): # Limited testing for non-standard calendars freq, closed, label, base = '8001T', None, None, 17 loffset = datetime.timedelta(hours=12) xr_index = xr.cftime_range(start='2004-01-01T12:07:01', periods=7, freq='3D', calendar=calendar) pd_index = pd.date_range(start='2004-01-01T12:07:01', periods=7, freq='3D') da_cftime = da(xr_index).resample( time=freq, closed=closed, label=label, base=base, loffset=loffset ).mean() da_datetime = da(pd_index).resample( time=freq, closed=closed, label=label, base=base, loffset=loffset ).mean() da_cftime['time'] = da_cftime.indexes['time'].to_datetimeindex() xr.testing.assert_identical(da_cftime, da_datetime)
def test_datetime_to_numeric_cftime(): times = cftime_range('2000', periods=5, freq='7D').values result = duck_array_ops.datetime_to_numeric(times, datetime_unit='h') expected = 24 * np.arange(0, 35, 7) np.testing.assert_array_equal(result, expected) offset = times[1] result = duck_array_ops.datetime_to_numeric( times, offset=offset, datetime_unit='h') expected = 24 * np.arange(-7, 28, 7) np.testing.assert_array_equal(result, expected) dtype = np.float32 result = duck_array_ops.datetime_to_numeric( times, datetime_unit='h', dtype=dtype) expected = 24 * np.arange(0, 35, 7).astype(dtype) np.testing.assert_array_equal(result, expected)
def test_datetime_to_numeric_cftime(): times = xr.cftime_range('2000', periods=5, freq='7D') da = xr.DataArray(times, coords=[times], dims=['time']) result = utils.datetime_to_numeric(da, datetime_unit='h') expected = 24 * xr.DataArray(np.arange(0, 35, 7), coords=da.coords) assert_identical(result, expected) offset = da.isel(time=1) result = utils.datetime_to_numeric(da, offset=offset, datetime_unit='h') expected = 24 * xr.DataArray(np.arange(-7, 28, 7), coords=da.coords) assert_identical(result, expected) dtype = np.float32 result = utils.datetime_to_numeric(da, datetime_unit='h', dtype=dtype) expected = 24 * xr.DataArray( np.arange(0, 35, 7), coords=da.coords).astype(dtype) assert_identical(result, expected)
def test_resample(freqs, closed, label, base): initial_freq, resample_freq = freqs start = '2000-01-01T12:07:01' index_kwargs = dict(start=start, periods=5, freq=initial_freq) datetime_index = pd.date_range(**index_kwargs) cftime_index = xr.cftime_range(**index_kwargs) loffset = '12H' try: da_datetime = da(datetime_index).resample( time=resample_freq, closed=closed, label=label, base=base, loffset=loffset).mean() except ValueError: with pytest.raises(ValueError): da(cftime_index).resample( time=resample_freq, closed=closed, label=label, base=base, loffset=loffset).mean() else: da_cftime = da(cftime_index).resample( time=resample_freq, closed=closed, label=label, base=base, loffset=loffset).mean() da_cftime['time'] = da_cftime.indexes['time'].to_datetimeindex() xr.testing.assert_identical(da_cftime, da_datetime)
def test_calendars(calendar): # Limited testing for non-standard calendars freq, closed, label, base = "8001T", None, None, 17 loffset = datetime.timedelta(hours=12) xr_index = xr.cftime_range(start="2004-01-01T12:07:01", periods=7, freq="3D", calendar=calendar) pd_index = pd.date_range(start="2004-01-01T12:07:01", periods=7, freq="3D") da_cftime = (da(xr_index).resample(time=freq, closed=closed, label=label, base=base, loffset=loffset).mean()) da_datetime = (da(pd_index).resample(time=freq, closed=closed, label=label, base=base, loffset=loffset).mean()) # TODO (benbovy - flexible indexes): update when CFTimeIndex is a xarray Index subclass da_cftime["time"] = da_cftime.xindexes["time"].to_pandas_index( ).to_datetimeindex() xr.testing.assert_identical(da_cftime, da_datetime)
def test_datetime_to_numeric_cftime(): times = cftime_range("2000", periods=5, freq="7D", calendar="standard").values result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h", dtype=int) expected = 24 * np.arange(0, 35, 7) np.testing.assert_array_equal(result, expected) offset = times[1] result = duck_array_ops.datetime_to_numeric(times, offset=offset, datetime_unit="h", dtype=int) expected = 24 * np.arange(-7, 28, 7) np.testing.assert_array_equal(result, expected) dtype = np.float32 result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h", dtype=dtype) expected = 24 * np.arange(0, 35, 7).astype(dtype) np.testing.assert_array_equal(result, expected)
def test_resample(freqs, closed, label, base) -> None: initial_freq, resample_freq = freqs start = "2000-01-01T12:07:01" index_kwargs = dict(start=start, periods=5, freq=initial_freq) datetime_index = pd.date_range(**index_kwargs) cftime_index = xr.cftime_range(**index_kwargs) loffset = "12H" try: da_datetime = (da(datetime_index).resample( time=resample_freq, closed=closed, label=label, base=base, loffset=loffset, ).mean()) except ValueError: with pytest.raises(ValueError): da(cftime_index).resample( time=resample_freq, closed=closed, label=label, base=base, loffset=loffset, ).mean() else: da_cftime = (da(cftime_index).resample( time=resample_freq, closed=closed, label=label, base=base, loffset=loffset, ).mean()) # TODO (benbovy - flexible indexes): update when CFTimeIndex is a xarray Index subclass da_cftime["time"] = ( da_cftime.xindexes["time"].to_pandas_index().to_datetimeindex()) xr.testing.assert_identical(da_cftime, da_datetime)
def _modeloutputfactory( start_time="1950-01-01", end_time="2014-12-31", variable_name="fakevariable" ): """Populate xr.Dataset with synthetic output data for testing""" start_time = str(start_time) end_time = str(end_time) np.random.seed(0) time = xr.cftime_range(start=start_time, end=end_time, calendar="noleap") # make sure that test data range is reasonable for the variable being tested low_val = None high_val = None if variable_name == "tasmax" or variable_name == "tasmin": low_val = 160 high_val = 340 elif variable_name == "dtr": low_val = 1 high_val = 40 elif variable_name == "pr": low_val = 0.01 high_val = 1900 data = np.random.randint(low_val, high_val, len(time)).astype(np.float64) out = xr.Dataset( {variable_name: (["time", "lon", "lat"], data[:, np.newaxis, np.newaxis])}, coords={ "index": time, "time": time, "lon": (["lon"], [1.0]), "lat": (["lat"], [1.0]), }, ) # need to set variable units to pass xclim 0.29 check on units out[variable_name].attrs["units"] = "K" return out
def monthly_csv_to_DataArray(df, freq="MS"): """Convert dataframes from NOAA catalog items to xr.DataArray. Example: >>> cat = intake.open_catalog('master.yaml') >>> df = cat.climate.NOAA_correlation.read() >>> da = monthly_csv_to_DataArray(df) >>> da <xr.DataArray> ... """ df = df.set_index("year") df = df.apply(pd.to_numeric, errors="coerce") initial = df.first_valid_index() if len(str(initial)) >= 4: initial = str(initial)[:4] initial = int(initial) return xr.DataArray( df.values.flatten(), dims="time", coords={ "time": xr.cftime_range(str(initial), freq=freq, periods=df.values.size) }, )
def _datafactory( x, start_time="1950-01-01", variable_name="fakevariable", lon=1.0, lat=1.0 ): """Populate xr.Dataset with synthetic data for testing""" start_time = str(start_time) if x.ndim != 1: raise ValueError("'x' needs dim of one") time = xr.cftime_range( start=start_time, freq="D", periods=len(x), calendar="standard" ) out = xr.Dataset( {variable_name: (["time", "lon", "lat"], x[:, np.newaxis, np.newaxis])}, coords={ "index": time, "time": time, "lon": (["lon"], [lon]), "lat": (["lat"], [lat]), }, ) # need to set variable units to pass xclim 0.29 check on units out[variable_name].attrs["units"] = "K" return out
def preprocess(ds,varkeep): """correct time dimension and drop unwanted variables""" # Correct time dimension to start in Jan rather than Feb if ds.time.values[0].month != 1: startyr = str(ds.time.values[0].year) correctedtime = xr.cftime_range(start=startyr,end="2005-12-31",freq="MS",calendar="noleap") ds = ds.assign_coords(time=correctedtime) print("\tCorrected Start to: %s; End to: %s" % (str(ds.time.values[0]),str(ds.time.values[-1]))) # Load proper latitude to accomodate different indexing from scipy.io import loadmat lat = np.squeeze(loadmat("/home/glliu/01_Data/CESM1_LATLON.mat")['LAT']) if np.any(~(lat == ds.lat.values)): ds = ds.assign_coords(lat=lat) print("\tReassigning latitude values ") # Drop variables unless it is in "varkeep" dsvars = list(ds.variables) varrem = [i for i in dsvars if i not in varkeep] ds = ds.drop(varrem) return ds
def test_cftimeindex_periods_repr(periods): """Test that cftimeindex has periods property in repr.""" index = xr.cftime_range(start="2000", periods=periods) repr_str = index.__repr__() assert f" length={periods}" in repr_str
def test_cftimeindex_freq_in_repr(freq, calendar): """Test that cftimeindex has frequency property in repr.""" index = xr.cftime_range(start="2000", periods=3, freq=freq, calendar=calendar) repr_str = index.__repr__() assert f", freq='{freq}'" in repr_str
def test_3641(): times = xr.cftime_range("0001", periods=3, freq="500Y") da = xr.DataArray(range(3), dims=["time"], coords=[times]) da.interp(time=["0002-05-01"])
def test_cftime_datetime_sub_cftimeindex(calendar): a = xr.cftime_range('2000', periods=5, calendar=calendar) result = a[0] - a expected = pd.TimedeltaIndex([timedelta(days=-i) for i in range(5)]) assert result.equals(expected) assert isinstance(result, pd.TimedeltaIndex)
def test_to_datetimeindex_out_of_range(calendar): index = xr.cftime_range('0001', periods=5, calendar=calendar) with pytest.raises(ValueError, match='0001'): index.to_datetimeindex()
def test_multiindex(): index = xr.cftime_range('2001-01-01', periods=100, calendar='360_day') mindex = pd.MultiIndex.from_arrays([index]) assert mindex.get_loc('2001-01') == slice(0, 30)
def test_cftimeindex_sub_not_implemented(calendar): a = xr.cftime_range("2000", periods=5, calendar=calendar) with pytest.raises(TypeError, match="unsupported operand"): a - 1
def cftime_index(time_range_kwargs): return xr.cftime_range(**time_range_kwargs)
def test_rounding_methods_against_datetimeindex(freq, method): expected = pd.date_range("2000-01-02T01:03:51", periods=10, freq="1777S") expected = getattr(expected, method)(freq) result = xr.cftime_range("2000-01-02T01:03:51", periods=10, freq="1777S") result = getattr(result, method)(freq).to_datetimeindex() assert result.equals(expected)
def test_cftime_datetime_sub_cftimeindex(calendar): a = xr.cftime_range("2000", periods=5, calendar=calendar) result = a[0] - a expected = pd.TimedeltaIndex([timedelta(days=-i) for i in range(5)]) assert result.equals(expected) assert isinstance(result, pd.TimedeltaIndex)
def test_to_datetimeindex_feb_29(calendar): index = xr.cftime_range('2001-02-28', periods=2, calendar=calendar) with pytest.raises(ValueError, match='29'): index.to_datetimeindex()
def test_cftimeindex_repr_formatting(periods, expected): """Test that cftimeindex.__repr__ is formatted similar to pd.Index.__repr__.""" index = xr.cftime_range(start="2000", periods=periods, freq="D") expected = dedent(expected) assert expected == repr(index)
def test_cftime_index_unchanged(): """Tests that a CFTime index going through convert time is unchanged.""" inits = xr.cftime_range("1990", "2000", freq="Y", calendar="noleap") da = xr.DataArray(np.random.rand(len(inits)), dims="init", coords=[inits]) new_inits = convert_time_index(da, "init", "") assert_allclose(new_inits.init, da.init)
def test_cftime_datetime_mean_dask_error(): times = cftime_range('2000', periods=4) da = DataArray(times, dims=['time']).chunk() with pytest.raises(NotImplementedError): da.mean()
def test_cftimeindex_shift_invalid_freq(): index = xr.cftime_range('2000', periods=3) with pytest.raises(TypeError): index.shift(1, 1)
def test_rounding_methods_invalid_freq(method): index = xr.cftime_range("2000-01-02T01:03:51", periods=10, freq="1777S") with pytest.raises(ValueError, match="fixed"): getattr(index, method)("MS")
def test_cftime_to_non_cftime_error(): times = xr.cftime_range('2000', periods=24, freq='D') da = xr.DataArray(np.arange(24), coords=[times], dims='time') with pytest.raises(TypeError): da.interp(time=0.5)
def test_distant_cftime_datetime_sub_cftimeindex(calendar): a = xr.cftime_range("2000", periods=5, calendar=calendar) with pytest.raises(ValueError, match="difference exceeds"): a.date_type(1, 1, 1) - a
def missing_any(da, freq, **indexer): r"""Return a boolean DataArray indicating whether there are missing days in the resampled array. Parameters ---------- da : DataArray Input array at daily frequency. freq : str Resampling frequency. **indexer : {dim: indexer, }, optional Time attribute and values over which to subset the array. For example, use season='DJF' to select winter values, month=1 to select January, or month=[6,7,8] to select summer months. If not indexer is given, all values are considered. Returns ------- out : DataArray A boolean array set to True if any month or year has missing values. """ from . import generic if "-" in freq: pfreq, anchor = freq.split("-") else: pfreq = freq # Compute the number of days in the time series during each period at the given frequency. selected = generic.select_time(da, **indexer) if selected.time.size == 0: raise ValueError("No data for selected period.") c = selected.notnull().resample(time=freq).sum(dim="time") # Otherwise simply use the start and end dates to find the expected number of days. if pfreq.endswith("S"): start_time = c.indexes["time"] end_time = start_time.shift(1, freq=freq) else: end_time = c.indexes["time"] start_time = end_time.shift(-1, freq=freq) if indexer: # Create a full synthetic time series and compare the number of days with the original series. t0 = str(start_time[0].date()) t1 = str(end_time[-1].date()) if isinstance(c.indexes["time"], xr.CFTimeIndex): cal = da.time.encoding.get("calendar") t = xr.cftime_range(t0, t1, freq="D", calendar=cal) else: t = pd.date_range(t0, t1, freq="D") sda = xr.DataArray(data=np.empty(len(t)), coords={"time": t}, dims=("time", )) st = generic.select_time(sda, **indexer) sn = st.notnull().resample(time=freq).sum(dim="time") miss = sn != c return miss n = (end_time - start_time).days nda = xr.DataArray(n.values, coords={"time": c.time}, dims="time") return c != nda
def test_cftimeindex_shift_invalid_n(): index = xr.cftime_range("2000", periods=3) with pytest.raises(TypeError): index.shift("a", "D")
def test_cftimeindex_repr_101_shorter(periods): index_101 = xr.cftime_range(start="2000", periods=101) index_periods = xr.cftime_range(start="2000", periods=periods) index_101_repr_str = index_101.__repr__() index_periods_repr_str = index_periods.__repr__() assert len(index_101_repr_str) < len(index_periods_repr_str)
def test_infer_freq(freq, calendar): indx = xr.cftime_range("2000-01-01", periods=3, freq=freq, calendar=calendar) out = xr.infer_freq(indx) assert out == freq
def test_cftimeindex_calendar_property(calendar, expected): index = xr.cftime_range(start="2000", periods=3, calendar=calendar) assert index.calendar == expected
def test_cftime_to_non_cftime_error(): times = xr.cftime_range("2000", periods=24, freq="D") da = xr.DataArray(np.arange(24), coords=[times], dims="time") with pytest.raises(TypeError): da.interp(time=0.5)
def test_cftimeindex_calendar_repr(calendar, expected): """Test that cftimeindex has calendar property in repr.""" index = xr.cftime_range(start="2000", periods=3, calendar=calendar) repr_str = index.__repr__() assert f" calendar='{expected}'" in repr_str assert "2000-01-01 00:00:00, 2000-01-02 00:00:00" in repr_str
def make_initialized_control(self): """Generates initialized ensembles and a control simulation, mimicking a perfect-model experiment.""" self.ds = xr.Dataset() self.control = xr.Dataset() self.nmember = 5 self.ninit = 6 self.nlead = 10 self.iterations = ITERATIONS self.nx = 72 self.ny = 36 self.control_start = 3000 self.control_end = 3300 self.ntime = self.control_end - self.control_start self.client = None FRAC_NAN = 0.0 times = xr.cftime_range( start=str(self.control_start), periods=self.ntime, freq="YS", calendar="noleap", ) leads = np.arange(1, 1 + self.nlead) members = np.arange(1, 1 + self.nmember) inits = xr.cftime_range( start=str(self.control_start), periods=self.ninit, freq="10YS", calendar="noleap", ) lons = xr.DataArray( np.linspace(0.5, 359.5, self.nx), dims=("lon", ), attrs={ "units": "degrees east", "long_name": "longitude" }, ) lats = xr.DataArray( np.linspace(-89.5, 89.5, self.ny), dims=("lat", ), attrs={ "units": "degrees north", "long_name": "latitude" }, ) self.ds["var"] = xr.DataArray( randn( (self.nmember, self.ninit, self.nlead, self.nx, self.ny), frac_nan=FRAC_NAN, ), coords={ "member": members, "init": inits, "lon": lons, "lat": lats, "lead": leads, }, dims=("member", "init", "lead", "lon", "lat"), name="var", attrs={ "units": "var units", "description": "a description" }, ) self.control["var"] = xr.DataArray( randn((self.ntime, self.nx, self.ny), frac_nan=FRAC_NAN), coords={ "lon": lons, "lat": lats, "time": times }, dims=("time", "lon", "lat"), name="var", attrs={ "units": "var units", "description": "a description" }, ) self.ds.attrs = {"history": "created for xarray benchmarking"} self.ds.lead.attrs["units"] = "years" self.control.time.attrs["units"] = "years"