def apply_qdm( simulation, qdm, years, variable, out, selslice=None, iselslice=None, out_zarr_region=None, root_attrs_json_file=None, new_attrs=None, ): """Adjust simulation years with QDM bias correction method, outputting Zarr Store""" first_year, last_year = (int(x) for x in years.split(",")) unpacked_attrs = None if new_attrs: unpacked_attrs = {k: v for x in new_attrs for k, v in (x.split("="), )} sel_slices_d = None if selslice: sel_slices_d = {} for s in selslice: k, v = s.split("=") sel_slices_d[k] = slice(*map(str, v.split(","))) isel_slices_d = None if iselslice: isel_slices_d = {} for s in iselslice: k, v = s.split("=") isel_slices_d[k] = slice(*map(int, v.split(","))) out_zarr_region_d = None if out_zarr_region: out_zarr_region_d = {} for s in out_zarr_region: k, v = s.split("=") out_zarr_region_d[k] = slice(*map(int, v.split(","))) services.apply_qdm( simulation=simulation, qdm=qdm, years=range(first_year, last_year + 1), # +1 because years is an inclusive range. variable=variable, out=out, sel_slice=sel_slices_d, isel_slice=isel_slices_d, out_zarr_region=out_zarr_region_d, root_attrs_json_file=root_attrs_json_file, new_attrs=unpacked_attrs, )
def test_apply_qdm(): """Test to apply a trained QDM to input data and read the output. This is an integration test between train_qdm, apply_qdm. """ # Setup input data. target_variable = "fakevariable" variable_kind = "additive" n_histdays = 10 * 365 # 10 years of daily historical. n_simdays = 50 * 365 # 50 years of daily simulation. model_bias = 2 ts_ref = np.ones(n_histdays, dtype=np.float64) ts_sim = np.ones(n_simdays, dtype=np.float64) hist = _datafactory(ts_ref + model_bias, variable_name=target_variable) ref = _datafactory(ts_ref, variable_name=target_variable) sim = _datafactory(ts_sim + model_bias, variable_name=target_variable) # Load up a fake repo with our input data in the place of big data and cloud # storage. qdm_key = "memory://test_apply_qdm/qdm.zarr" hist_key = "memory://test_apply_qdm/hist.zarr" ref_key = "memory://test_apply_qdm/ref.zarr" sim_key = "memory://test_apply_qdm/sim.zarr" sim_adj_key = "memory://test_apply_qdm/sim_adjusted.zarr" repository.write(sim_key, sim) repository.write(hist_key, hist) repository.write(ref_key, ref) target_years = [1994, 1995] train_qdm( historical=hist_key, reference=ref_key, out=qdm_key, variable=target_variable, kind=variable_kind, ) apply_qdm( simulation=sim_key, qdm=qdm_key, years=target_years, variable=target_variable, out=sim_adj_key, ) adjusted_ds = repository.read(sim_adj_key) assert target_variable in adjusted_ds.variables
def test_prime_qdm_output_zarrstore(): """ Test that prime_qdm_output_zarrstore creates a Zarr with variables, shapes, attrs. We're testing this by running QDM (train + apply) in it's usualy mode and then using comparable parameters to prime a Zarr Store. We then compare the two. """ # Setup input data. quantile_variable = "sim_q" target_variable = "fakevariable" variable_kind = "additive" n_histdays = 10 * 365 # 10 years of daily historical. n_simdays = 50 * 365 # 50 years of daily simulation. model_bias = 2 ts_ref = np.ones(n_histdays, dtype=np.float64) ts_sim = np.ones(n_simdays, dtype=np.float64) hist = _datafactory(ts_ref + model_bias, variable_name=target_variable) ref = _datafactory(ts_ref, variable_name=target_variable) sim = _datafactory(ts_sim + model_bias, variable_name=target_variable) # Load up a fake repo with our input data in the place of big data and cloud # storage. qdm_key = "memory://test_prime_qdm_output_zarrstore/qdm.zarr" hist_key = "memory://test_prime_qdm_output_zarrstore/hist.zarr" ref_key = "memory://test_prime_qdm_output_zarrstore/ref.zarr" sim_key = "memory://test_prime_qdm_output_zarrstore/sim.zarr" sim_adj_key = "memory://test_prime_qdm_output_zarrstore/sim_adjusted.zarr" primed_url = "memory://test_prime_qdm_output_zarrstore/primed.zarr" repository.write(sim_key, sim) repository.write(hist_key, hist) repository.write(ref_key, ref) target_year = 1995 # Lets prime a QDM output. prime_qdm_output_zarrstore( simulation=sim_key, variable=target_variable, years=[target_year], out=primed_url, zarr_region_dims=["lat"], ) primed_ds = repository.read(primed_url) # Now train, apply actual QDM and compare outputs with primed Zarr Store train_qdm( historical=hist_key, reference=ref_key, out=qdm_key, variable=target_variable, kind=variable_kind, ) apply_qdm( simulation=sim_key, qdm=qdm_key, years=[target_year], variable=target_variable, out=sim_adj_key, ) adjusted_ds = repository.read(sim_adj_key) # Desired variables present? assert ( quantile_variable in primed_ds.variables and target_variable in primed_ds.variables ) # Desired shapes with dims in correct order? assert primed_ds[quantile_variable].shape == primed_ds[target_variable].shape assert primed_ds[target_variable].shape == adjusted_ds[target_variable].shape # Output attrs matching for root and variables? assert primed_ds.attrs == adjusted_ds.attrs assert primed_ds[target_variable].attrs == adjusted_ds[target_variable].attrs assert primed_ds[quantile_variable].attrs == adjusted_ds[quantile_variable].attrs
def test_prime_qdm_regional_apply(): """ Integration test checking that prime_qdm_output_zarrstore and apply_qdm can write regionally. The strategy is to create input data for two latitudes and run two QDMs (train + apply). One doing a "vanilla", global QDM. The other using a "regional" strategy: training and applying QDM on each latitude and then writing each to the same, primed zarr store. We then compare output from the vanilla and regional approaches. """ # Setup input data. target_variable = "fakevariable" variable_kind = "additive" n_histdays = 10 * 365 # 10 years of daily historical. n_simdays = 50 * 365 # 50 years of daily simulation. model_bias = 2 ts_ref = np.ones(n_histdays, dtype=np.float64) ts_sim = np.ones(n_simdays, dtype=np.float64) hist = _datafactory(ts_ref + model_bias, variable_name=target_variable) ref = _datafactory(ts_ref, variable_name=target_variable) sim = _datafactory(ts_sim + model_bias, variable_name=target_variable) # Append a copy of the data onto a new latitude of "2.0". I'm too lazy to # modify the data factories to get this. Gives us a way to test regional # writes. sim = xr.concat([sim, sim.assign({"lat": np.array([2.0])})], dim="lat") sim[target_variable][ :, :, -1 ] += 1 # Introducing a slight difference for different lat. ref = xr.concat([ref, ref.assign({"lat": np.array([2.0])})], dim="lat") hist = xr.concat([hist, hist.assign({"lat": np.array([2.0])})], dim="lat") # Datafactory appends cruft "index" coordinate. We're removing it because we # dont need it and I'm too lazy to tinker with input data fixtures. hist = hist.drop_vars("index") ref = ref.drop_vars("index") sim = sim.drop_vars("index") # Load up a fake repo with our input data in the place of big data and cloud # storage. qdm_key = "memory://test_apply_qdm/qdm_global.zarr" qdm_region1_key = "memory://test_apply_qdm/qdm_region1.zarr" qdm_region2_key = "memory://test_apply_qdm/qdm_region2.zarr" hist_key = "memory://test_apply_qdm/hist.zarr" ref_key = "memory://test_apply_qdm/ref.zarr" sim_key = "memory://test_apply_qdm/sim.zarr" primed_url = "memory://test_prime_qdm_output_zarrstore/primed.zarr" sim_adj_key = "memory://test_apply_qdm/sim_adjusted.zarr" repository.write(sim_key, sim) repository.write(hist_key, hist) repository.write(ref_key, ref) target_years = [1994, 1995] # Lets prime a QDM output. prime_qdm_output_zarrstore( simulation=sim_key, variable=target_variable, years=target_years, out=primed_url, zarr_region_dims=["lat"], ) # Now train, apply QDM for two cases. One with region write to primed # zarr and one without. # Writing to regions region_1 = {"lat": slice(0, 1)} train_qdm( historical=hist_key, reference=ref_key, out=qdm_region1_key, variable=target_variable, kind=variable_kind, isel_slice=region_1, ) apply_qdm( simulation=sim_key, qdm=qdm_region1_key, years=target_years, variable=target_variable, out=primed_url, isel_slice=region_1, out_zarr_region=region_1, ) region_2 = {"lat": slice(1, 2)} train_qdm( historical=hist_key, reference=ref_key, out=qdm_region2_key, variable=target_variable, kind=variable_kind, isel_slice=region_2, ) apply_qdm( simulation=sim_key, qdm=qdm_region2_key, years=target_years, variable=target_variable, out=primed_url, isel_slice=region_2, out_zarr_region=region_2, ) primed_adjusted_ds = repository.read(primed_url) # Doing it globally, all "regions" at once. train_qdm( historical=hist_key, reference=ref_key, out=qdm_key, variable=target_variable, kind=variable_kind, ) apply_qdm( simulation=sim_key, qdm=qdm_key, years=target_years, variable=target_variable, out=sim_adj_key, ) adjusted_ds = repository.read(sim_adj_key) # Desired variables present? xr.testing.assert_allclose(primed_adjusted_ds, adjusted_ds)
def test_qplad_integration(kind): """Integration test of the QDM and QPLAD services""" lon = [-99.83, -99.32, -99.79, -99.23] lat = [42.25, 42.21, 42.63, 42.59] time = xr.cftime_range(start="1994-12-17", end="2015-01-15", calendar="noleap") data_ref = 15 + 8 * np.random.randn(len(time), 4, 4) data_train = 15 + 8 * np.random.randn(len(time), 4, 4) variable = "scen" ref_fine = xr.Dataset( data_vars=dict( scen=(["time", "lat", "lon"], data_ref), ), coords=dict( time=time, lon=(["lon"], lon), lat=(["lat"], lat), ), attrs=dict(description="Weather related data."), ) ds_train = xr.Dataset( data_vars=dict( scen=(["time", "lat", "lon"], data_train), ), coords=dict( time=time, lon=(["lon"], lon), lat=(["lat"], lat), ), attrs=dict(description="Weather related data."), ) # take the mean across space to represent coarse reference data for AFs ds_ref_coarse = ref_fine.mean(["lat", "lon"]) ds_train = ds_train.mean(["lat", "lon"]) # tile the fine resolution grid with the coarse resolution ref data ref_coarse = ds_ref_coarse.broadcast_like(ref_fine) ds_bc = ds_train + 3 # need to set variable units to pass xclim 0.29 check on units ds_train["scen"].attrs["units"] = "K" ds_bc["scen"].attrs["units"] = "K" ref_coarse["scen"].attrs["units"] = "K" ref_fine["scen"].attrs["units"] = "K" ds_ref_coarse["scen"].attrs["units"] = "K" # write test data ref_coarse_coarse_url = ( "memory://test_qplad_downscaling/a/ref_coarse_coarse/path.zarr" ) ref_coarse_url = "memory://test_qplad_downscaling/a/ref_coarse/path.zarr" ref_fine_url = "memory://test_qplad_downscaling/a/ref_fine/path.zarr" qdm_train_url = "memory://test_qplad_downscaling/a/qdm_train/path.zarr" sim_url = "memory://test_qplad_downscaling/a/sim/path.zarr" qdm_train_out_url = "memory://test_qplad_downscaling/a/qdm_train_out/path.zarr" biascorrected_url = "memory://test_qplad_downscaling/a/biascorrected/path.zarr" sim_biascorrected_key = ( "memory://test_qplad_downscaling/a/biascorrected/sim_biascorrected.zarr" ) repository.write(ref_coarse_coarse_url, ds_ref_coarse) repository.write( ref_coarse_url, ref_coarse.chunk({"time": -1, "lat": -1, "lon": -1}), ) repository.write( ref_fine_url, ref_fine.chunk({"time": -1, "lat": -1, "lon": -1}), ) repository.write(qdm_train_url, ds_train) repository.write(sim_url, ds_bc) # this is an integration test between QDM and QPLAD, so use QDM services # for bias correction target_year = 2005 train_qdm( historical=qdm_train_url, reference=ref_coarse_coarse_url, out=qdm_train_out_url, variable=variable, kind=kind, ) apply_qdm( simulation=sim_url, qdm=qdm_train_out_url, years=[target_year], variable=variable, out=sim_biascorrected_key, ) biascorrected_coarse = repository.read(sim_biascorrected_key) # make bias corrected data on the fine resolution grid biascorrected_fine = biascorrected_coarse.broadcast_like( ref_fine.sel( time=slice("{}-01-01".format(target_year), "{}-12-31".format(target_year)) ) ) repository.write( biascorrected_url, biascorrected_fine.chunk({"time": -1, "lat": -1, "lon": -1}), ) # write test data qplad_afs_url = "memory://test_qplad_downscaling/a/qplad_afs/path.zarr" # Writes NC to local disk, so diff format here: sim_downscaled_url = "memory://test_qplad_downscaling/a/qplad_afs/downscaled.zarr" # now train QPLAD model train_qplad(ref_coarse_url, ref_fine_url, qplad_afs_url, variable, kind) # downscale apply_qplad(biascorrected_url, qplad_afs_url, variable, sim_downscaled_url) # check output downscaled_ds = repository.read(sim_downscaled_url) # check that downscaled average equals bias corrected value bc_timestep = biascorrected_fine[variable].isel(time=100).values[0][0] qplad_downscaled_mean = downscaled_ds[variable].isel(time=100).mean().values np.testing.assert_almost_equal(bc_timestep, qplad_downscaled_mean)