def test_combine_by_coords_distant_cftime_dates(): # Regression test for https://github.com/pydata/xarray/issues/3535 import cftime time_1 = [cftime.DatetimeGregorian(4500, 12, 31)] time_2 = [cftime.DatetimeGregorian(4600, 12, 31)] time_3 = [cftime.DatetimeGregorian(5100, 12, 31)] da_1 = DataArray([0], dims=["time"], coords=[time_1], name="a").to_dataset() da_2 = DataArray([1], dims=["time"], coords=[time_2], name="a").to_dataset() da_3 = DataArray([2], dims=["time"], coords=[time_3], name="a").to_dataset() result = combine_by_coords([da_1, da_2, da_3]) expected_time = np.concatenate([time_1, time_2, time_3]) expected = DataArray([0, 1, 2], dims=["time"], coords=[expected_time], name="a").to_dataset() assert_identical(result, expected)
def ensure_cftime_array(time: Sequence): """Convert an input 1D array to an array of cftime objects. Python's datetime are converted to cftime.DatetimeGregorian. Raises ValueError when unable to cast the input. """ if isinstance(time, xr.DataArray): time = time.indexes["time"] elif isinstance(time, np.ndarray): time = pd.DatetimeIndex(time) if isinstance(time[0], cftime.datetime): return time if isinstance(time[0], pydt.datetime): return np.array( [cftime.DatetimeGregorian(*ele.timetuple()[:6]) for ele in time]) raise ValueError("Unable to cast array to cftime dtype")
def test_combine_by_coords_raises_for_differing_calendars(): # previously failed with uninformative StopIteration instead of TypeError # https://github.com/pydata/xarray/issues/4495 import cftime time_1 = [cftime.DatetimeGregorian(2000, 1, 1)] time_2 = [cftime.DatetimeProlepticGregorian(2001, 1, 1)] da_1 = DataArray([0], dims=["time"], coords=[time_1], name="a").to_dataset() da_2 = DataArray([1], dims=["time"], coords=[time_2], name="a").to_dataset() with raises_regex(TypeError, r"cannot compare .* \(different calendars\)"): combine_by_coords([da_1, da_2])
def return_ibtracs_storm(fname, storm_idx, variables): ibnc = xr.open_dataset(fname, mask_and_scale=False) nc = ibnc.sel(storm=storm_idx) # remove fill values and append data to dictionary d = dict() for v in variables: vv = nc[v] if v == 'time': fv = cftime.DatetimeGregorian(-25518, 1, 28, 0, 0, 0, 0) else: fv = vv._FillValue data = vv.values[vv != fv] if v == 'landfall': # there is always one less landfall value, replace with last value data = np.append(data, data[-1]) d[v] = data return d
def test_combine_by_coords_raises_for_differing_calendars(): # previously failed with uninformative StopIteration instead of TypeError # https://github.com/pydata/xarray/issues/4495 import cftime time_1 = [cftime.DatetimeGregorian(2000, 1, 1)] time_2 = [cftime.DatetimeProlepticGregorian(2001, 1, 1)] da_1 = DataArray([0], dims=["time"], coords=[time_1], name="a").to_dataset() da_2 = DataArray([1], dims=["time"], coords=[time_2], name="a").to_dataset() if LooseVersion(cftime.__version__) >= LooseVersion("1.5"): error_msg = "Cannot combine along dimension 'time' with mixed types." else: error_msg = r"cannot compare .* \(different calendars\)" with pytest.raises(TypeError, match=error_msg): combine_by_coords([da_1, da_2])
def stack_by_init_date( ds, init_dates, n_lead_steps, time_dim="time", init_dim="init_date", lead_dim="lead_time", time_rounding="D", ): """Stack timeseries array in inital date / lead time format. Parameters ---------- ds : xarray DataArray or Dataset Input array containing a time dimension period : list List of initial dates of the same object type as the times in the time dimension of ds n_lead_steps: int Maximum number of lead time steps time_dim: str, default 'time' Name of the time dimension in ds init_dim: str, default 'init_date' Name of the initial date dimension to create in the output lead_dim: str, default 'lead_time' Name of the lead time dimension to create in the output time_rounding : {'A', 'M', 'D'}, default 'D' Match time axis and init dates by floor rounding to nearest day, month, or year Returns ------- stacked : xarray DataArray or Dataset Array with data stacked by specified initial dates and lead steps Notes ----- Only initial dates that fall within the time range of the input timeseries are retained. Thus, inital dates prior to the time range of the input timeseries that include data at longer lead times are not included in the output dataset. To include these data, prepend the input timeseries with nans so that the initial dates in question are present in the time dimension of the input timeseries. """ # Only keep init dates that fall within available times times = ds[time_dim] init_dates = init_dates[ np.logical_and(init_dates >= times.min(), init_dates <= times.max()) ] # Initialise indexes of specified inital dates and time info for each initial date time2d = np.empty((len(init_dates), n_lead_steps), "object") time2d[:] = cftime.DatetimeGregorian( 3000, 1, 1 ) # Year 3000 where data do not exist init_date_indexes = [] for ndate, init_date in enumerate(init_dates): start_index = _get_match_index(times, init_date.item(), time_rounding) end_index = start_index + n_lead_steps time_slice = ds[time_dim][start_index:end_index] time2d[ndate, : len(time_slice)] = time_slice init_date_indexes.append(start_index) # Use `rolling` to stack timeseries like forecasts # Note, rolling references each window to the RH edge of the window. Hence we reverse the timeseries # so that each window starts at the specified initial date and includes n_lead_steps to the right of # that element ds = ds.copy().sel({time_dim: slice(None, None, -1)}) init_date_indexes = [ds.sizes[time_dim] - 1 - i for i in init_date_indexes] ds = ds.rolling({time_dim: n_lead_steps}, min_periods=1).construct( lead_dim, keep_attrs=True ) ds = ds.isel({time_dim: init_date_indexes}) # Account for reversal of timeseries ds = ds.sel({lead_dim: slice(None, None, -1)}) ds = ds.rename({time_dim: init_dim}) ds = ds.assign_coords({lead_dim: ds[lead_dim].values}) ds = ds.assign_coords({time_dim: ([init_dim, lead_dim], time2d)}) ds = ds.assign_coords({init_dim: init_dates.values}) return ds
def main(f, years): sDir = os.path.dirname(f) ncfile = xr.open_dataset(f, mask_and_scale=False) yrs = np.arange(years[0], years[1] + 1, 1) sf = pd.read_csv(os.path.join(sDir, 'summary_1970-2019.csv')) hindex = list(sf['findex']) storms_all = dict() storms_major = dict() for yr in yrs: storms_all[yr] = 0 storms_major[yr] = 0 # fig_all, ax_all = plt.subplots(subplot_kw=dict(projection=ccrs.PlateCarree())) # fig_major, ax_major = plt.subplots(subplot_kw=dict(projection=ccrs.PlateCarree())) fig_all, ax_all = plt.subplots(subplot_kw=dict(projection=ccrs.Robinson())) fig_major, ax_major = plt.subplots(subplot_kw=dict(projection=ccrs.Robinson())) ax_lims = [-120, 0, 0, 55] for i, hi in enumerate(hindex): # set up map axes if i == 0: add_map_features(ax_all, ax_lims) add_map_features(ax_major, ax_lims) ncf = ncfile.sel(storm=hi) lat = ncf.lat.values lat[lat == -9999] = np.nan lon = ncf.lon.values lon[lon == -9999] = np.nan category = np.nanmax(ncf.usa_sshs.values) # distance from land is < 60 nmile (111 km) lf = ncf.landfall.values.astype('float') lf[lf == -9999] = np.nan # convert fill values to nan minlf = np.nanmin(lf) lf_ind = np.where(lf < 111)[0] lf_lon = lon[lf_ind] # choose when landfall is < 60 nmile and the storm is west of 40 degrees W #if np.logical_and(minlf < 111, any(lf_lon < -60)): if np.logical_and(minlf < 111, any(lf_lon < -40)): nsamerica_lf = 'yes' else: nsamerica_lf = 'no' lw = 1 bc = 'darkgray' alpha = .6 mk = 'None' # count the storms that make landfall west of 40 degrees W each year if np.logical_and(category >= 0, nsamerica_lf == 'yes'): t0 = min(t for t in ncf.time.values if t > cftime.DatetimeGregorian(1800, 1, 1, 0, 0, 0, 0)) storms_all[t0.year] = storms_all[t0.year] + 1 ax_all.plot(lon, lat, c='r', marker=mk, linewidth=lw, transform=ccrs.PlateCarree()) if category >= 3: storms_major[t0.year] = storms_major[t0.year] + 1 ax_major.plot(lon, lat, c='r', marker=mk, linewidth=lw, transform=ccrs.PlateCarree()) else: ax_major.plot(lon, lat, c=bc, marker=mk, linewidth=lw, alpha=alpha, transform=ccrs.PlateCarree()) else: ax_all.plot(lon, lat, c=bc, marker=mk, linewidth=lw, alpha=alpha, transform=ccrs.PlateCarree()) ax_major.plot(lon, lat, c=bc, marker=mk, linewidth=lw, alpha=alpha, transform=ccrs.PlateCarree()) # export_df(storms_all, os.path.join(sDir, 'NA_landfalling_storms_all_1970-2019-test.csv')) # export_df(storms_major, os.path.join(sDir, 'NA_landfalling_storms_major_1970-2019-test.csv')) fig_all.savefig(os.path.join(sDir, 'NA_storms_all_1970-2019-test40deg.png'), dpi=300) plt.close(fig_all) fig_major.savefig(os.path.join(sDir, 'NA_storms_major_1970-2019-test40deg.png'), dpi=300) plt.close(fig_major)
def generate_range( start: cftime.datetime, end: cftime.datetime, offset: cftime_offsets.BaseCFTimeOffset, ) -> Iterable[cftime.datetime]: """ Generate a range of datetime objects between start and end, using offset to determine the steps. The range will extend both ends of the span to the next valid timestep, see examples. Parameters ---------- start: :class:`cftime.datetime` Starting datetime from which to generate the range (noting roll backward mentioned above and illustrated in the examples). end: :class:`cftime.datetime` Last datetime from which to generate the range (noting roll forward mentioned above and illustrated in the examples). offset: Offset object for determining the timesteps. Yields ------ :class:`cftime.datetime` Next datetime in the range Raises ------ ValueError Offset does not result in increasing :class:`cftime.datetime`'s Examples -------- The range is extended at either end to the nearest timestep. In the example below, the first timestep is rolled back to 1st Jan 2001 whilst the last is extended to 1st Jan 2006. >>> import datetime as dt >>> from pprint import pprint >>> from scmdata.offsets import to_offset, generate_range >>> g = generate_range( ... dt.datetime(2001, 4, 1), ... dt.datetime(2005, 6, 3), ... to_offset("AS"), ... ) >>> pprint([d for d in g]) [cftime.datetime(2001, 1, 1, 0, 0), cftime.datetime(2002, 1, 1, 0, 0), cftime.datetime(2003, 1, 1, 0, 0), cftime.datetime(2004, 1, 1, 0, 0), cftime.datetime(2005, 1, 1, 0, 0), cftime.datetime(2006, 1, 1, 0, 0)] In this example the first timestep is rolled back to 31st Dec 2000 whilst the last is extended to 31st Dec 2005. >>> g = generate_range( ... dt.datetime(2001, 4, 1), ... dt.datetime(2005, 6, 3), ... to_offset("A"), ... ) >>> pprint([d for d in g]) [cftime.datetime(2000, 12, 31, 0, 0), cftime.datetime(2001, 12, 31, 0, 0), cftime.datetime(2002, 12, 31, 0, 0), cftime.datetime(2003, 12, 31, 0, 0), cftime.datetime(2004, 12, 31, 0, 0), cftime.datetime(2005, 12, 31, 0, 0)] In this example the first timestep is already on the offset so stays there, the last timestep is to 1st Sep 2005. >>> g = generate_range( ... dt.datetime(2001, 4, 1), ... dt.datetime(2005, 6, 3), ... to_offset("QS"), ... ) >>> pprint([d for d in g]) [cftime.datetime(2001, 4, 1, 0, 0), cftime.datetime(2001, 7, 1, 0, 0), cftime.datetime(2001, 10, 1, 0, 0), cftime.datetime(2002, 1, 1, 0, 0), cftime.datetime(2002, 4, 1, 0, 0), cftime.datetime(2002, 7, 1, 0, 0), cftime.datetime(2002, 10, 1, 0, 0), cftime.datetime(2003, 1, 1, 0, 0), cftime.datetime(2003, 4, 1, 0, 0), cftime.datetime(2003, 7, 1, 0, 0), cftime.datetime(2003, 10, 1, 0, 0), cftime.datetime(2004, 1, 1, 0, 0), cftime.datetime(2004, 4, 1, 0, 0), cftime.datetime(2004, 7, 1, 0, 0), cftime.datetime(2004, 10, 1, 0, 0), cftime.datetime(2005, 1, 1, 0, 0), cftime.datetime(2005, 4, 1, 0, 0), cftime.datetime(2005, 7, 1, 0, 0)] """ # Uses the Gregorian calendar - allows for adding/subtracting datetime.timedelta in range calc start_cf = cftime.DatetimeGregorian(*start.timetuple()[:6]) end_cf = cftime.DatetimeGregorian(*end.timetuple()[:6]) res = cftime_offsets.cftime_range(offset.rollback(start_cf), offset.rollforward(end_cf), freq=offset) return [cftime.datetime(*dt.timetuple()[:6]) for dt in res]
def main(): import argparse locations = yaml.safe_load(open('locations.yml')) variables_def = yaml.safe_load(open('indicators.yml')) assets = yaml.safe_load(open('assets.yml')) cmip6_yml = yaml.safe_load(open('cmip6.yml')) parser = argparse.ArgumentParser() parser.add_argument( "--max-workers", type=int, default=4, help= "Number of parallel threads for data download. Hint: use `--max-workers 1` for serial downlaod." ) # g = parser.add_argument_group('variables or asset') g = parser.add_mutually_exclusive_group(required=True) # g.add_argument('--era5', nargs='*', help='list of ERA5-monthly variables to download (original name, no correction)') # g.add_argument('--cmip6', nargs='*', help='list of CMIP6-monthly variables to download') g.add_argument('--indicators', nargs='*', default=[], choices=[vdef['name'] for vdef in variables_def], help='list of custom indicators to download') g.add_argument( '--asset', choices=list(assets.keys()), help= 'pre-defined list of variables, defined in assets.yml (experimental)') parser.add_argument( '--dataset', choices=['era5', 'cmip6'], help='dataset in combination with for `--indicators` and `--asset`') parser.add_argument('-o', '--output', default='indicators', help='output directory, default: %(default)s') parser.add_argument('--overwrite', action='store_true', help=argparse.SUPPRESS) g = parser.add_argument_group('location') g.add_argument('--location', choices=[loc['name'] for loc in locations], help='location name defined in locations.yml') g.add_argument('--lon', type=float) g.add_argument('--lat', type=float) g = parser.add_argument_group('area size controls') g.add_argument( '--area', nargs=4, type=float, help='area as four numbers: top, left, bottom, right (CDS convention)') g.add_argument( '--width-km', type=float, default=1000, help= "Width (km) around the selected location, when not provided by `area`. %(default)s km by default." ) g.add_argument( '--view', nargs=4, type=float, help= 'area for plot as four numbers: top, left, bottom, right (CDS convention)' ) g = parser.add_argument_group('ERA5 control') # g.add_argument('--year', nargs='+', default=list(range(1979, 2019+1)), help='ERA5 years to download, default: %(default)s') g.add_argument('--year', nargs='+', default=list(range(1979, 2019 + 1)), help=argparse.SUPPRESS) g = parser.add_argument_group('CMIP6 control') g.add_argument('--model', nargs='*', default=None, choices=get_all_models()) g.add_argument('--experiment', nargs='*', choices=cmip6_yml["experiments"], default=['ssp5_8_5']) # g.add_argument('--period', default=None, help=argparse.SUPPRESS) # all CMIP6 models and future experiements share the same parameter... # g.add_argument('--historical', action='store_true', help='this flag provokes downloading historical data as well and extend back the CMIP6 timeseries to 1979') g.add_argument('--historical', action='store_true', default=True, help=argparse.SUPPRESS) g.add_argument('--no-historical', action='store_false', dest='historical', help=argparse.SUPPRESS) # g.add_argument('--bias-correction', action='store_true', help='align CMIP6 variables with matching ERA5') g.add_argument('--bias-correction', action='store_true', default=True, help=argparse.SUPPRESS) g.add_argument('--no-bias-correction', action='store_false', dest='bias_correction', help='suppress bias-correction for CMIP6 data') g.add_argument( '--reference-period', default=[1979, 2019], nargs=2, type=int, help='reference period for bias correction (default: %(default)s)') g.add_argument('--yearly-bias', action='store_true', help='yearly instead of monthly bias correction') g.add_argument( '--ensemble', action='store_true', help= 'If `--model` is not specified, default to all available models. Also write a csv file with all models as columns, as well as median, lower and upper (5th and 95th percentiles) fields.' ) g = parser.add_argument_group('visualization') g.add_argument('--view-region', action='store_true') g.add_argument('--view-timeseries', action='store_true') g.add_argument('--png-region', action='store_true') g.add_argument('--png-timeseries', action='store_true') g.add_argument('--dpi', default=100, type=int, help='dop-per-inches (default: %(default)s)') g.add_argument('--yearly-mean', action='store_true') o = parser.parse_args() if not (o.location or (o.lon and o.lat)): parser.error( 'please provide a location, for instance `--location Welkenraedt`, or use custom lon and lat, e.g. `--lon 5.94 --lat 50.67`' ) elif o.location: loc = {loc['name']: loc for loc in locations}[o.location] o.lon, o.lat = loc['lon'], loc['lat'] if 'area' in loc and not o.area: o.area = loc['area'] if not o.area: o.area = make_area(o.lon, o.lat, o.width_km) print('lon', o.lon) print('lat', o.lat) if not o.asset and not o.indicators: parser.error( 'please provide indicators, for example: `--indicators 2m_temperature` or asset, e.g. `--asset energy`' ) # assets only contain indicators if o.asset: for vname in assets[o.asset]: if vname not in [v['name'] for v in variables_def]: parser.error( f'unknown indicator in assets.yml: {vname}. See indicators.yml for indicator definition' ) o.indicators.append(vname) # folder structure for CSV results loc_folder = o.location.lower() if o.location else f'{o.lat}N-{o.lon}E' asset_folder = o.asset if o.asset else 'all' if o.model is None: if o.ensemble: o.model = get_all_models() else: o.model = 'mpi_esm1_2_lr' # loop over indicators vdef_by_name = {v['name']: v for v in variables_def} for name in o.indicators: variables = [] # each variable for the simulation set vdef = vdef_by_name[name] indicator_def = dict(name=name, units=vdef.get('units'), description=vdef.get('description'), scale=vdef.get('scale', 1), offset=vdef.get('offset', 0)) vdef2 = vdef.get('era5', {}) era5_kwargs = dict(area=o.area, year=o.year) era5 = parse_indicator(ERA5, defs=vdef2, cls_kwargs=era5_kwargs, **indicator_def) era5.simulation_set = 'ERA5' era5.set_folder = 'era5' era5.alias = name if not o.dataset or o.dataset == 'era5' or o.bias_correction: variables.append(era5) vdef2 = vdef.get('cmip6', {}) transform = Transform(vdef2.get('scale', 1), vdef2.get('offset', 0)) if not o.dataset or o.dataset == 'cmip6': for model in o.model: labels = { x: "{}-{}.{}".format(*x.split("_")) for x in cmip6_yml["experiments"] } # if o.historical: # historical_kwargs = dict(model=model, experiment='historical') # historical = parse_indicator(CMIP6, defs=vdef2, cls_kwargs=historical_kwargs, **indicator_def) # else: # historical = None for experiment in o.experiment: cmip6_kwargs = dict(model=model, experiment=experiment, historical=o.historical, area=o.area) cmip6 = parse_indicator(CMIP6, defs=vdef2, cls_kwargs=cmip6_kwargs, **indicator_def) cmip6.reference = era5 cmip6.simulation_set = f'CMIP6 - {labels.get(experiment, experiment)} - {model}' cmip6.set_folder = f'cmip6-{model}-{experiment}' cmip6.alias = name # print("indicator variable", experiment, [d.name for d in cmip6.datasets]) variables.append(cmip6) if not variables: logging.warning(f'no variable for {name}') continue if o.max_workers < 2: variables2 = download_all_variables_serial(variables) else: variables2 = download_all_variables(variables) # Diagnose which variables have been excluded names = list(set([v.name for v in variables])) names2 = list(set([v.name for v in variables2])) models = list( set([ v.datasets[0].model for v in variables if isinstance(v.datasets[0], CMIP6) ])) models2 = list( set([ v.datasets[0].model for v in variables2 if isinstance(v.datasets[0], CMIP6) ])) print(f"Downloaded {len(variables2)} out of {len(variables)}") print(f"... {len(names2)} out of {len(names)} variable types") print(f"... {len(models2)} out of {len(models)} models") print("CMIP6 models excluded:", " ".join([m for m in models if m not in models2])) print("CMIP6 models included:", " ".join(models2)) variables = variables2 # download and convert to csv for v in variables: folder = os.path.join(o.output, loc_folder, asset_folder, v.set_folder) v.csv_file = os.path.join(folder, (v.alias or v.variable) + '.csv') if os.path.exists(v.csv_file): print("Already exitst:", v.csv_file) continue series = v.load_timeseries(o.lon, o.lat, overwrite=o.overwrite) bias_correction_method = vdef.get('bias-correction') if o.bias_correction and isinstance( v.datasets[0], CMIP6) and bias_correction_method is not None: era5 = v.reference.load_timeseries(o.lon, o.lat) #v.set_folder += '-unbiased' if o.yearly_bias: series = correct_yearly_bias(series, era5, o.reference_period, bias_correction_method) else: series = correct_monthly_bias(series, era5, o.reference_period, bias_correction_method) os.makedirs(folder, exist_ok=True) print("Save to", v.csv_file) save_csv(series, v.csv_file) if o.ensemble: ensemble_files = {} import cftime, datetime for experiment in o.experiment: ensemble_variables = [ v for v in variables if isinstance(v.datasets[0], CMIP6) and v.datasets[0].experiment == experiment ] dates = np.array([ cftime.DatetimeGregorian(y, m, 15) for y in range(1979, 2100 + 1) for m in range(1, 12 + 1) ]) index = pd.Index(cftime.date2num(dates, time_units), name=time_units) df = {} for v in ensemble_variables: series = load_csv(v.csv_file) series.index = index[:len(series)] df[v.datasets[0].model] = series df = pd.DataFrame(df) median = df.median(axis=1) lower = df.quantile(.05, axis=1) upper = df.quantile(.95, axis=1) df["median"] = median df["lower"] = lower df["upper"] = upper first = ensemble_variables[0] folder = os.path.join( o.output, loc_folder, asset_folder, first.set_folder.replace(first.datasets[0].model, "ensemble")) csv_file = os.path.join(folder, first.alias or first.name) + '.csv' ensemble_files[experiment] = csv_file os.makedirs(folder, exist_ok=True) print("Save to", csv_file) save_csv(df, csv_file) if o.view_region or o.view_timeseries or o.png_region or o.png_timeseries: import matplotlib.pyplot as plt cb = None try: import cartopy import cartopy.crs as ccrs kwargs = dict(projection=ccrs.PlateCarree()) except ImportError: logging.warning('install cartopy to benefit from coastlines') cartopy = None kwargs = {} if o.view is None: o.view = o.area def plot_timeseries(v): figname = v.csv_file.replace('.csv', '.png') if os.path.exists(figname): return fig2 = plt.figure(num=2) plt.clf() ax2 = fig2.add_subplot(1, 1, 1) ts = load_csv(v.csv_file) # convert units for easier reading of graphs ts.index = convert_time_units_series(ts.index, years=True) # ts.plot(ax=ax2, label=v.simulation_set) l, = ax2.plot(ts.index, ts.values, label=v.simulation_set) ax2.legend() ax2.set_xlabel(ts.index.name) ax2.set_ylabel(v.units) ax2.set_title(name) # add yearly mean as well if o.yearly_mean: yearly_mean = ts.rolling(12).mean() l2, = ax2.plot(ts.index[::12], yearly_mean[::12], alpha=1, linewidth=2, color=l.get_color()) if o.png_timeseries: fig2.savefig(figname, dpi=o.dpi) def plot_region(v): v0 = v.datasets[0] figname = v.csv_file.replace('.csv', '-region.png') if os.path.exists(figname): return fig1 = plt.figure(num=1) plt.clf() ax1 = fig1.add_subplot(1, 1, 1, **kwargs) if isinstance(v.datasets[0], ERA5): y1, y2 = o.reference_period roll = False title = f'ERA5: {y1}-{y2}' else: y1, y2 = 2071, 2100 roll = True if o.view[1] < 0 else False title = f'{labels.get(v0.experiment, v0.experiment)} ({v0.model}): {y1}-{y2}' refslice = slice(str(y1), str(y2)) map = v.load_cube(time=refslice, area=o.view, roll=roll).mean(dim='time') h = ax1.imshow(map.values[::-1], extent=cube_area(map, extent=True)) cb = plt.colorbar(h, ax=ax1, label=f'{name} ({v.units})') # h = map.plot(ax=ax1, cbar_kwargs={'label':f'{v.units}'}, robust=True) ax1.set_title(title) ax1.plot(o.lon, o.lat, 'ko') if cartopy: ax1.coastlines(resolution='10m') if o.png_region: fig1.savefig(figname, dpi=o.dpi) for v in variables: if o.view_timeseries or o.png_timeseries: plot_timeseries(v) if o.view_region or o.png_region: try: plot_region(v) except: logging.warning(f'failed to make map for {v.name}') # all simulation sets on one figure def plot_all_simulations(): figname = os.path.join(o.output, loc_folder, asset_folder, 'all_' + name + '.png') if os.path.exists(figname): return fig3 = plt.figure(num=3) plt.clf() ax3 = fig3.add_subplot(1, 1, 1) for v in variables: ts = load_csv(v.csv_file) ts.index = convert_time_units_series(ts.index, years=True) if isinstance(v.datasets[0], ERA5): color = 'k' zorder = 5 else: color = None zorder = None # add yearly mean instead of monthly mean if o.yearly_mean: yearly_mean = ts.rolling(12).mean() x = ts.index[::12] y = yearly_mean[::12] else: x = ts.index y = ts.values l, = ax3.plot(x, y, alpha=0.5 if o.ensemble else 1, label=v.simulation_set, linewidth=1 if o.ensemble else 2, color=color, zorder=zorder) # Add ensemble mean if o.ensemble: for experiment in ensemble_files: df = load_csv(ensemble_files[experiment]) df.index = convert_time_units_series(df.index, years=True) if o.yearly_mean: yearly_mean = df.rolling(12).mean() x = df.index[::12] y = yearly_mean.iloc[::12] else: x = df.index y = df l, = ax3.plot(x, y["median"], alpha=1, label=f"{experiment} (median)", linewidth=2, zorder=4) ax3.plot(x, y["lower"], linewidth=1, zorder=4, linestyle="--", color=l.get_color()) ax3.plot(x, y["upper"], linewidth=1, zorder=4, linestyle="--", color=l.get_color()) ax3.fill_between(x, y["lower"], y["upper"], alpha=0.2, zorder=-1, color=l.get_color()) ax3.legend(fontsize='xx-small') ax3.set_ylabel(v.units) ax3.set_xlabel(ts.index.name) ax3.set_title(name) # ax3.set_xlim(xmin=start_year, xmax=2100) mi, ma = ax3.get_xlim() if mi < 0: ax3.set_xlim( xmin=0) # start at start_year (i.e. ERA5 start) if o.png_timeseries: fig3.savefig(figname, dpi=max(o.dpi, 300)) if o.view_timeseries or o.png_timeseries: plot_all_simulations() if o.view_timeseries or o.view_region: plt.show()
import numpy as np from forest import util @pytest.mark.parametrize("given,expect", [ pytest.param('2019-10-10 01:02:34', dt.datetime(2019, 10, 10, 1, 2, 34), id="str with space"), pytest.param('2019-10-10T01:02:34', dt.datetime(2019, 10, 10, 1, 2, 34), id="iso8601"), pytest.param(np.datetime64('2019-10-10T11:22:33'), dt.datetime(2019, 10, 10, 11, 22, 33), id="datetime64"), pytest.param(cftime.DatetimeGregorian(2019, 10, 10, 11, 22, 33), dt.datetime(2019, 10, 10, 11, 22, 33), id="cftime.DatetimeGregorian"), ]) def test__to_datetime(given, expect): assert util.to_datetime(given) == expect class Test_to_datetime(unittest.TestCase): def test_datetime(self): now = dt.datetime.now() result = util.to_datetime(now) self.assertEqual(result, now) def test_unsupported(self): with self.assertRaisesRegex(Exception, 'Unknown value'):
def main(): import argparse locations = yaml.safe_load(open('locations.yml')) parser = argparse.ArgumentParser() parser.add_argument( "--max-workers", type=int, default=4, help= "Number of parallel threads for data download. Hint: use `--max-workers 1` for serial downlaod." ) # g = parser.add_argument_group('variables or asset') # g = parser.add_mutually_exclusive_group(required=True) # g.add_argument('--era5', nargs='*', help='list of ERA5-monthly variables to download (original name, no correction)') # g.add_argument('--cmip6', nargs='*', help='list of CMIP6-monthly variables to download') parser.add_argument('--indicator', required=True, choices=VARIABLES) # parser.add_argument('--dataset', choices=['era5', 'cmip6'], help='dataset in combination with for `--indicators` and `--asset`') parser.add_argument('-o', '--output', default='indicators', help='output directory, default: %(default)s') parser.add_argument('--overwrite', action='store_true', help=argparse.SUPPRESS) g = parser.add_argument_group('location') g.add_argument('--location', choices=[loc['name'] for loc in locations], help='location name defined in locations.yml') g.add_argument('--lon', type=float) g.add_argument('--lat', type=float) g = parser.add_argument_group('CMIP6 control') g.add_argument('--model', nargs='+', default=None, choices=MODELS) g.add_argument( '--ensemble_member', default=None, help="typically `r1i1p1f1` but some models require different members") g.add_argument('--experiment', nargs='*', choices=['ssp1_2_6', 'ssp2_4_5', 'ssp3_7_0', 'ssp5_8_5'], default=['ssp5_8_5']) # g.add_argument('--ensemble', action='store_true', help='If `--model` is not specified, default to all available models for the standard set of parameters. ') o = parser.parse_args() if not o.model: o.model = MODELS if not (o.location or (o.lon and o.lat)): parser.error( 'please provide a location, for instance `--location Welkenraedt`, or use custom lon and lat, e.g. `--lon 5.94 --lat 50.67`' ) elif o.location: loc = {loc['name']: loc for loc in locations}[o.location] o.lon, o.lat = loc['lon'], loc['lat'] print('lon', o.lon) print('lat', o.lat) for experiment in o.experiment: variables = [ ExtremeValueIndices(o.indicator, model, experiment, historical=experiment != "historical", ensemble=o.ensemble_member) for model in o.model ] # https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor-example downloaded_variables = [] with concurrent.futures.ThreadPoolExecutor( max_workers=o.max_workers) as executor: # Start the load operations and mark each future with its URL future_to_url = {executor.submit(v.download): v for v in variables} for future in concurrent.futures.as_completed(future_to_url): v = future_to_url[future] try: data = future.result() except Exception as exc: print(f'failed to download {v} : {exc}') else: downloaded_variables.append(v) loc_folder = o.location.lower() if o.location else f'{o.lat}N-{o.lon}E' folder = os.path.join(o.output, loc_folder, "extremes") os.makedirs(folder, exist_ok=True) dataset = {} # homogenize units dates = np.array([ cftime.DatetimeGregorian(y, 12, 31) for y in range(1979, 2100 + 1) ]) index = pd.Index(cftime.date2num(dates, time_units), name=time_units) for v in downloaded_variables: series = v.load_timeseries(lon=o.lon, lat=o.lat, overwrite=o.overwrite) series.index = index[:len( series)] # otherwise we have things like 180, 182 etc dataset[v.model] = series csv_file = os.path.join(folder, f'{o.indicator}-{v.model}.csv') print("Save to file", csv_file) series.to_csv(csv_file) df = pd.DataFrame(dataset) csv_file = os.path.join(folder, f'{o.indicator}-all.csv') print("Save to file", csv_file) df.to_csv(csv_file)
"2019-10-10 01:02:34", dt.datetime(2019, 10, 10, 1, 2, 34), id="str with space", ), pytest.param( "2019-10-10T01:02:34", dt.datetime(2019, 10, 10, 1, 2, 34), id="iso8601", ), pytest.param( np.datetime64("2019-10-10T11:22:33"), dt.datetime(2019, 10, 10, 11, 22, 33), id="datetime64", ), pytest.param( cftime.DatetimeGregorian(2019, 10, 10, 11, 22, 33), dt.datetime(2019, 10, 10, 11, 22, 33), id="cftime.DatetimeGregorian", ), ], ) def test__to_datetime(given, expect): assert util.to_datetime(given) == expect class Test_to_datetime(unittest.TestCase): def test_datetime(self): now = dt.datetime.now() result = util.to_datetime(now) self.assertEqual(result, now)
def test_ord_to_datetime(): # Independence day date = cftime.DatetimeGregorian(1776, 7, 4, 12, 0, 0, 0) ord_time = date2num(date, TIMEUNITS) # Independence day (note that this fails if date has microseconds != 0) assert ord_to_datetime(ord_time, TIMEUNITS) == date
# get data array for variable: _da = db_aggregated.filter(variable=var, climatemodel=climatemodels_fl).timeseries( ).transpose().unstack().to_xarray().squeeze() # convert to dataset: _ds = _da.to_dataset(name=var) # remove coordinate for variabel (contained in name): del _ds.coords[variable] # merge with existing dataset: ds = xr.merge([_ds, ds]) ds['year'] = xr.DataArray([t.year for t in ds['time'].values], dims='time') ds['month'] = xr.DataArray([t.month for t in ds['time'].values], dims='time') ds['day'] = xr.DataArray([t.day for t in ds['time'].values], dims='time') # Convert to cftime dates = [ cftime.DatetimeGregorian(y, m, d) for y, m, d in zip(ds['year'], ds['month'], ds['day']) ] ds['time'] = dates ds = ds.sel(time=slice('1850', '2100')) ds['time'] = pd.to_datetime([ pd.datetime(y, m, d) for y, m, d in zip(ds['year'], ds['month'], ds['day']) ]) # Timestep for integral: ds['delta_t'] = xr.DataArray(np.ones(len(ds['time'])), dims='time', coords={'time': ds['time']}) ds_save = ds.copy() # %% ds
with forest.drivers.eida50.Database(path): pass with forest.drivers.eida50.Database(path): pass @pytest.mark.parametrize( "value", [None, "2020-01-01 00:00:00", np.datetime64("2020-01-01 00:00:00", "s")], ) def test_database_insert_times_invalid_types(value): """Anything that doesn't support object.strftime(fmt)""" database = forest.drivers.eida50.Database() with pytest.raises(Exception): database.insert_times([value], "file.nc") @pytest.mark.parametrize( "value,expect", [ pytest.param(dt.datetime(2020, 1, 1), dt.datetime(2020, 1, 1)), pytest.param(cftime.DatetimeGregorian(2020, 1, 1), dt.datetime(2020, 1, 1)), ], ) def test_database_insert_times_supported_types(value, expect): database = forest.drivers.eida50.Database() database.insert_times([value], "file.nc") assert database.fetch_times() == [expect]
values = np.arange(3 * 3 * 3 * 3).reshape(3, 3, 3, 3) with netCDF4.Dataset(path, "w") as dataset: variable_4d(dataset, variable, times, pressures, longitudes, latitudes, values) lon, lat = 0.1, 0.1 loader = series.SeriesLoader([path]) result = loader._load_netcdf4(path, variable, lon, lat, pressure=500) expect = {"x": times, "y": values[:, 1, 0, 0]} npt.assert_array_equal(expect["x"], result["x"]) npt.assert_array_equal(expect["y"], result["y"]) @pytest.mark.parametrize( "value,expect", [(dt.datetime(2020, 1, 1), "2020-01-01 00:00:00"), (cftime.DatetimeGregorian(2020, 1, 1), "2020-01-01 00:00:00")]) def test_series_locator_key(value, expect): assert series.SeriesLocator.key(value) == expect class TestSeries(unittest.TestCase): def setUp(self): self.path = "test-series.nc" def tearDown(self): if os.path.exists(self.path): os.remove(self.path) def test_series_given_missing_variable_returns_empty(self): pressure = 500 lon = 1
right = pd.date_range("2020-01-01", periods=3) assert time_array_equal(left, right) == False def test_valueerror_lengths_must_match(): a = ["2020-01-01T00:00:00Z"] b = [ "2020-02-01T00:00:00Z", "2020-02-02T00:00:00Z", "2020-02-03T00:00:00Z" ] with pytest.raises(ValueError): pd.to_datetime(a) == pd.to_datetime(b) @pytest.mark.parametrize("left,right,expect", [ pytest.param([ cftime.DatetimeGregorian(2020, 1, 1), cftime.DatetimeGregorian(2020, 1, 2), cftime.DatetimeGregorian(2020, 1, 3) ], pd.date_range("2020-01-01", periods=3), True, id="gregorian/pandas same values"), pytest.param([ cftime.DatetimeGregorian(2020, 2, 1), cftime.DatetimeGregorian(2020, 2, 2), cftime.DatetimeGregorian(2020, 2, 3) ], pd.date_range("2020-01-01", periods=3), False, id="gregorian/pandas same length different values"), ])
def main(f, years): sDir = os.path.dirname(f) ncfile = xr.open_dataset(f, mask_and_scale=False) yrs = np.arange(years[0], years[1] + 1, 1) sf = pd.read_csv(os.path.join(sDir, 'summary_1970-2019.csv')) hindex = list(sf['findex']) # distance from land is < 60 nmile (111 km) storm_summary = dict(name=[], year=[], t0=[], tf=[], max_usa_sshs=[], landfall_lat=[], landfall_lon=[], dist_from_shore_km=[], landfall_cat=[], landfall_wspd_kts=[], landfall_pres=[], findex=[]) for i, hi in enumerate(hindex): ncf = ncfile.sel(storm=hi) t0 = min(t for t in ncf['time'].values if t > cftime.DatetimeGregorian(1800, 1, 1, 0, 0, 0, 0)) if t0.year in yrs: lf = ncf.landfall.values.astype('float') lf[lf == -9999] = np.nan # convert fill values to nan # find all landfall indices lf_ind = np.where(lf < 111)[0] # find the storm category cats = return_clean_array(ncf, 'usa_sshs') max_cat = np.nanmax(cats) # if the storm makes landfall and is a TS or higher if np.logical_and(len(lf_ind) > 0, max_cat >= 0): # break up index into each consecutive section new_ind = [] ni = [] for tri, index in enumerate(lf_ind): if 0 < tri < len(lf_ind) - 1: if index - lf_ind[tri - 1] > 1: new_ind.append(ni) ni = [] ni.append(index) else: ni.append(index) elif tri == len(lf_ind) - 1: if index - lf_ind[tri - 1] > 1: new_ind.append(ni) new_ind.append([index]) else: ni.append(index) new_ind.append(ni) else: ni.append(index) # find the index of the beginning of each individual landfall (not just where landfall=0) landfall_idx = [] for ii, jj in enumerate(new_ind): landfall_idx.append(jj[0]) lats = return_clean_array(ncf, 'lat') lons = return_clean_array(ncf, 'lon') wspd = return_clean_array(ncf, 'usa_wind') pres = return_clean_array(ncf, 'usa_pres') # find the storm category, max windspeed, and pressure at landfall for idx in landfall_idx: lf_lon = lons[idx] if lf_lon < -60: storm_summary['name'].append( ncf['name'].values.tostring().decode('utf-8')) tf = max(ncf['time'].values) storm_summary['t0'].append(t0) storm_summary['tf'].append(tf) storm_summary['year'].append(t0.year) storm_summary['findex'].append(hi) storm_summary['dist_from_shore_km'].append(lf[idx]) storm_summary['max_usa_sshs'].append(max_cat) storm_summary['landfall_lat'].append(lats[idx]) storm_summary['landfall_lon'].append(lf_lon) storm_summary['landfall_cat'].append(cats[idx]) storm_summary['landfall_wspd_kts'].append(wspd[idx]) storm_summary['landfall_pres'].append(pres[idx]) df = pd.DataFrame(storm_summary) df.to_csv(os.path.join(sDir, 'NA_landfall_summary_1970-2019.csv'), index=False)