def test_collect_missing_ens_cycle( file_glob, ans_file, n_cores ): miss_ens_cycle_path = test_dir.joinpath(miss_ens_cycle_dir) files = sorted(miss_ens_cycle_path.glob(file_glob)) ens_cycle_ds = open_whp_dataset(files, n_cores=n_cores) # There is a bit of tricky encoding to deal with NaN in strings in netcdf # and type conversions if 'crs' in ens_cycle_ds.variables: ens_cycle_ds['crs'] = ens_cycle_ds['crs'].astype('S8') ens_cycle_ds['crs'].encoding['_FillValue'] = 'nan' # This is mostly because int32 is changed to float64 bc of nans for vv in ens_cycle_ds.variables: if 'time' not in vv: ens_cycle_ds[vv].encoding['dtype'] = ens_cycle_ds[vv].dtype ans = xr.open_dataset(answer_dir / ans_file) xr.testing.assert_equal(ens_cycle_ds, ans) ens_cycle_ds_chunk = open_whp_dataset(files, n_cores=n_cores, file_chunk_size=1) if 'crs' in ens_cycle_ds.variables: ens_cycle_ds_chunk['crs'] = ens_cycle_ds_chunk['crs'].astype('S8') xr.testing.assert_equal(ens_cycle_ds_chunk, ens_cycle_ds)
def test_collect_missing_ens_cycle(file_glob, expected, n_cores): miss_ens_cycle_path = test_dir.joinpath(miss_ens_cycle_dir) files = sorted(miss_ens_cycle_path.glob(file_glob)) ens_cycle_ds = open_whp_dataset(files, n_cores=n_cores) # This checks everything about the metadata. assert repr(ens_cycle_ds) == expected ens_cycle_ds_chunk = open_whp_dataset(files, n_cores=n_cores, file_chunk_size=1) assert ens_cycle_ds_chunk.equals(ens_cycle_ds)
def test_collect_ensemble_cycle_isel(file_glob, expected, n_cores, isel): ens_cycle_path = test_dir.joinpath('data/collection_data/ens_ana') files = sorted(ens_cycle_path.glob(file_glob)) ens_cycle_ds = open_whp_dataset(files, n_cores=n_cores, isel=isel) # This checks everything about the metadata. assert repr(ens_cycle_ds) == expected ens_cycle_ds_chunk = open_whp_dataset(files, n_cores=n_cores, isel=isel, file_chunk_size=1) assert ens_cycle_ds_chunk.equals(ens_cycle_ds)
def test_collect_ensemble_cycle_isel(file_glob, ans_file, n_cores, isel): ens_cycle_path = test_dir.joinpath('data/collection_data/ens_ana') files = sorted(ens_cycle_path.glob(file_glob)) ens_cycle_ds = open_whp_dataset(files, n_cores=n_cores, isel=isel) ans = xr.open_dataset(answer_dir / ans_file) xr.testing.assert_equal(ens_cycle_ds, ans) ens_cycle_ds_chunk = open_whp_dataset(files, n_cores=n_cores, isel=isel, file_chunk_size=2) xr.testing.assert_equal(ens_cycle_ds_chunk, ans)
def test_collect_profile_chunking(file_glob, expected, n_cores): sim_path = test_dir.joinpath(sim_dir) files = sorted(sim_path.glob(file_glob)) sim_ds = open_whp_dataset(files, n_cores=n_cores, profile=True, chunks=15) # This checks everything about the metadata. assert repr(sim_ds) == expected # if file_chunk_size > and chunk is not None there is an error. sim_ds_chunk = open_whp_dataset(files, n_cores=n_cores, profile=True, chunks=15, file_chunk_size=2) assert sim_ds_chunk.equals(sim_ds)
def test_collect_profile_chunking(file_glob, ans_file, n_cores): sim_path = test_dir.joinpath(sim_dir) files = sorted(sim_path.glob(file_glob)) sim_ds = open_whp_dataset(files, n_cores=n_cores, profile=True, chunks=15) ans = xr.open_dataset(answer_dir / ans_file) xr.testing.assert_equal(sim_ds, ans) # if file_chunk_size > and chunk is not None there is an error. sim_ds_chunk = open_whp_dataset(files, n_cores=n_cores, profile=True, chunks=15, file_chunk_size=1) xr.testing.assert_equal(sim_ds_chunk, ans)
def test_collect_ensemble_cycle(file_glob, expected, n_cores): ens_cycle_path = test_dir.joinpath('data/collection_data/ens_ana') files = sorted(ens_cycle_path.glob(file_glob)) ens_cycle_ds = open_whp_dataset(files, n_cores=n_cores) # This checks everything about the metadata. assert repr(ens_cycle_ds) == expected # Test that hierarchical collects are identical # Speed up this super slow one... file_chunk_size = 1 if file_glob == '*/*/*LDASOUT_DOMAIN1': file_chunk_size = 50 ens_cycle_ds_chunk = open_whp_dataset(files, n_cores=n_cores, file_chunk_size=file_chunk_size) assert ens_cycle_ds_chunk.equals(ens_cycle_ds)
def test_collect_cycle(file_glob, expected, n_cores): cycle_path = test_dir.joinpath(cycle_dir) files = sorted(cycle_path.glob(file_glob)) cycle_ds = open_whp_dataset(files, n_cores=n_cores) # This checks everything about the metadata. from pprint import pprint assert repr(cycle_ds) == expected
def test_collect_ensemble_cycle(file_glob, ans_file, n_cores): ens_cycle_path = test_dir.joinpath('data/collection_data/ens_ana') files = sorted(ens_cycle_path.glob(file_glob)) ens_cycle_ds = open_whp_dataset(files, n_cores=n_cores) ans = xr.open_dataset(answer_dir / ans_file) xr.testing.assert_equal(ens_cycle_ds, ans) # Test that hierarchical collects are identical # Speed up this super slow one... file_chunk_size = 1 if file_glob == '*/*/*LDASOUT_DOMAIN1': file_chunk_size = 50 ens_cycle_ds_chunk = open_whp_dataset(files, n_cores=n_cores, file_chunk_size=file_chunk_size) xr.testing.assert_equal(ens_cycle_ds_chunk, ens_cycle_ds)
def test_init(mod_dir, mod_glob): files = sorted(mod_dir.glob(mod_glob)) mod = open_whp_dataset(files) mod_df = mod.streamflow.to_dataframe() obs_df = mod_df streamflow_eval = Evaluation(mod_df, obs_df) assert type(streamflow_eval) == Evaluation
def test_collect_simulation( file_glob, ans_file, n_cores ): sim_path = test_dir.joinpath(sim_dir) files = sorted(sim_path.glob(file_glob)) sim_ds = open_whp_dataset(files, n_cores=n_cores) ans = xr.open_dataset(answer_dir / ans_file) xr.testing.assert_equal(sim_ds, ans)
def test_collect_ensemble( file_glob, ans_file, n_cores ): ens_path = test_dir.joinpath(ens_dir) files = sorted(ens_path.glob(file_glob)) ens_ds = open_whp_dataset(files, n_cores=n_cores) ans = xr.open_dataset(answer_dir / ans_file) xr.testing.assert_equal(ens_ds, ans)
def test_gof_perfect(engine, mod_dir, mod_glob, indices_dict, join_on, variable, group_by_in, transform, transform_key, expected_key): # Keep this variable agnostic files = sorted(mod_dir.glob(mod_glob)) mod = open_whp_dataset(files).isel(indices_dict) if group_by_in is None: group_by_key = '' group_by = None elif group_by_in == 'space': group_by_key = '-' + group_by_in group_by = copy.deepcopy(join_on) group_by.remove('time') else: raise ValueError("not a valid grouping for this test: ", group_by) expected_answer_key = expected_key + group_by_key + '_' + transform_key # expected = gof_answer_reprs[expected_answer_key] expected = str_to_frame(gof_answer_reprs[expected_answer_key]) if engine == 'pd': mod_df = mod[variable].to_dataframe().rename( columns={variable: 'modeled'}) obs_df = mod[variable].to_dataframe().rename( columns={variable: 'observed'}) mod_df.modeled = transform(mod_df.modeled) the_eval = Evaluation(mod_df, obs_df, join_on=join_on) gof = the_eval.gof(group_by=group_by) assert_frame_close(round_trip_df_serial(gof), expected) elif engine == 'xr': if group_by_in is not None: pytest.skip("Currently not grouping using xarray.") mod_ds = mod.rename({variable: 'modeled'})['modeled'] obs_ds = mod.rename({variable: 'observed'})['observed'] new_data = np.array(transform(mod_ds.to_dataframe().modeled)).reshape( mod_ds.shape) mod_ds.values = new_data # mod_ds = xr.DataArray(new_data, dims=mod_ds.dims, coords=mod_ds.coords) the_eval = Evaluation(mod_ds, obs_ds, join_on=join_on) gof = the_eval.gof(group_by=group_by).to_dataframe() # assert repr(gof) == expected assert_frame_close(round_trip_df_serial(gof), expected)
def test_collect_simulation(file_glob, expected, n_cores): sim_path = test_dir.joinpath(sim_dir) files = sorted(sim_path.glob(file_glob)) sim_ds = open_whp_dataset(files, n_cores=n_cores) # This checks everything about the metadata. assert repr(sim_ds) == expected
def test_collect_ensemble(file_glob, expected, n_cores): ens_path = test_dir.joinpath(ens_dir) files = sorted(ens_path.glob(file_glob)) ens_ds = open_whp_dataset(files, n_cores=n_cores) # This checks everything about the metadata. assert repr(ens_ds) == expected