def test_validation_n3_k2_temporal_matching_no_matches(): tst_results = {} datasets = setup_two_without_overlap() dm = DataManager( datasets, "DS1", read_ts_names={d: "read" for d in ["DS1", "DS2", "DS3"]}, ) process = Validation( dm, "DS1", temporal_matcher=temporal_matchers.BasicTemporalMatching( window=1 / 24.0 ).combinatory_matcher, scaling="lin_cdf_match", metrics_calculators={ (3, 2): metrics_calculators.BasicMetrics( other_name="k1" ).calc_metrics }, ) jobs = process.get_processing_jobs() for job in jobs: results = process.calc(*job) assert sorted(list(results)) == sorted(list(tst_results))
def test_validation_error_n2_k2(): datasets = setup_TestDatasets() dm = DataManager( datasets, "DS1", read_ts_names={d: "read" for d in ["DS1", "DS2", "DS3"]}, ) # n less than number of datasets is no longer allowed with pytest.raises(ValueError): Validation( dm, "DS1", temporal_matcher=temporal_matchers.BasicTemporalMatching( window=1 / 24.0 ).combinatory_matcher, scaling="lin_cdf_match", metrics_calculators={ (2, 2): metrics_calculators.BasicMetrics( other_name="k1" ).calc_metrics }, )
def test_validation_n3_k2_temporal_matching_no_matches2(): tst_results = { (("DS1", "x"), ("DS3", "y")): { "n_obs": np.array([1000], dtype=np.int32), "tau": np.array([np.nan], dtype=np.float32), "gpi": np.array([4], dtype=np.int32), "RMSD": np.array([0.0], dtype=np.float32), "lon": np.array([4.0]), "p_tau": np.array([np.nan], dtype=np.float32), "BIAS": np.array([0.0], dtype=np.float32), "p_rho": np.array([0.0], dtype=np.float32), "rho": np.array([1.0], dtype=np.float32), "lat": np.array([4.0]), "R": np.array([1.0], dtype=np.float32), "p_R": np.array([0.0], dtype=np.float32), }, (("DS1", "x"), ("DS3", "x")): { "n_obs": np.array([1000], dtype=np.int32), "tau": np.array([np.nan], dtype=np.float32), "gpi": np.array([4], dtype=np.int32), "RMSD": np.array([0.0], dtype=np.float32), "lon": np.array([4.0]), "p_tau": np.array([np.nan], dtype=np.float32), "BIAS": np.array([0.0], dtype=np.float32), "p_rho": np.array([0.0], dtype=np.float32), "rho": np.array([1.0], dtype=np.float32), "lat": np.array([4.0]), "R": np.array([1.0], dtype=np.float32), "p_R": np.array([0.0], dtype=np.float32), }, } datasets = setup_three_with_two_overlapping() dm = DataManager( datasets, "DS1", read_ts_names={d: "read" for d in ["DS1", "DS2", "DS3"]}, ) process = Validation( dm, "DS1", temporal_matcher=temporal_matchers.BasicTemporalMatching( window=1 / 24.0 ).combinatory_matcher, scaling="lin_cdf_match", metrics_calculators={ (3, 2): metrics_calculators.BasicMetrics( other_name="k1" ).calc_metrics }, ) jobs = process.get_processing_jobs() for job in jobs: results = process.calc(*job) assert sorted(list(results)) == sorted(list(tst_results))
def test_validation_n3_k2(): tst_results = { (('DS1', 'x'), ('DS3', 'y')): { 'n_obs': np.array([1000], dtype=np.int32), 'tau': np.array([np.nan], dtype=np.float32), 'gpi': np.array([4], dtype=np.int32), 'RMSD': np.array([0.], dtype=np.float32), 'lon': np.array([4.]), 'p_tau': np.array([np.nan], dtype=np.float32), 'BIAS': np.array([0.], dtype=np.float32), 'p_rho': np.array([0.], dtype=np.float32), 'rho': np.array([1.], dtype=np.float32), 'lat': np.array([4.]), 'R': np.array([1.], dtype=np.float32), 'p_R': np.array([0.], dtype=np.float32)}, (('DS1', 'x'), ('DS2', 'y')): { 'n_obs': np.array([1000], dtype=np.int32), 'tau': np.array([np.nan], dtype=np.float32), 'gpi': np.array([4], dtype=np.int32), 'RMSD': np.array([0.], dtype=np.float32), 'lon': np.array([4.]), 'p_tau': np.array([np.nan], dtype=np.float32), 'BIAS': np.array([0.], dtype=np.float32), 'p_rho': np.array([0.], dtype=np.float32), 'rho': np.array([1.], dtype=np.float32), 'lat': np.array([4.]), 'R': np.array([1.], dtype=np.float32), 'p_R': np.array([0.], dtype=np.float32)}, (('DS1', 'x'), ('DS3', 'x')): { 'n_obs': np.array([1000], dtype=np.int32), 'tau': np.array([np.nan], dtype=np.float32), 'gpi': np.array([4], dtype=np.int32), 'RMSD': np.array([0.], dtype=np.float32), 'lon': np.array([4.]), 'p_tau': np.array([np.nan], dtype=np.float32), 'BIAS': np.array([0.], dtype=np.float32), 'p_rho': np.array([0.], dtype=np.float32), 'rho': np.array([1.], dtype=np.float32), 'lat': np.array([4.]), 'R': np.array([1.], dtype=np.float32), 'p_R': np.array([0.], dtype=np.float32)}} datasets = setup_TestDatasets() process = Validation( datasets, 'DS1', temporal_matcher=temporal_matchers.BasicTemporalMatching( window=1 / 24.0).combinatory_matcher, scaling='lin_cdf_match', metrics_calculators={ (3, 2): metrics_calculators.BasicMetrics(other_name='k1').calc_metrics}) jobs = process.get_processing_jobs() for job in jobs: results = process.calc(*job) assert sorted(list(results)) == sorted(list(tst_results))
def test_validation_n3_k2_masking_no_data_remains(): datasets = setup_TestDatasets() # setup masking datasets grid = grids.CellGrid(np.array([1, 2, 3, 4]), np.array([1, 2, 3, 4]), np.array([4, 4, 2, 1]), gpis=np.array([1, 2, 3, 4])) mds1 = GriddedTsBase("", grid, MaskingTestDataset) mds2 = GriddedTsBase("", grid, MaskingTestDataset) mds = { 'masking1': { 'class': mds1, 'columns': ['x'], 'args': [], 'kwargs': {'limit': 500}, 'use_lut': False, 'grids_compatible': True}, 'masking2': { 'class': mds2, 'columns': ['x'], 'args': [], 'kwargs': {'limit': 1000}, 'use_lut': False, 'grids_compatible': True} } process = Validation( datasets, 'DS1', temporal_matcher=temporal_matchers.BasicTemporalMatching( window=1 / 24.0).combinatory_matcher, scaling='lin_cdf_match', metrics_calculators={ (3, 2): metrics_calculators.BasicMetrics(other_name='k1').calc_metrics}, masking_datasets=mds) gpi_info = (1, 1, 1) ref_df = datasets['DS1']['class'].read(1) with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=DeprecationWarning) new_ref_df = process.mask_dataset(ref_df, gpi_info) assert len(new_ref_df) == 0 nptest.assert_allclose(new_ref_df.x.values, np.arange(1000, 1000)) jobs = process.get_processing_jobs() for job in jobs: with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=DeprecationWarning) results = process.calc(*job) tst = [] assert sorted(list(results)) == sorted(list(tst)) for key, tst_key in zip(sorted(results), sorted(tst)): nptest.assert_almost_equal(results[key]['n_obs'], tst[tst_key]['n_obs'])
def test_validation_error_n2_k2(): datasets = setup_TestDatasets() dm = DataManager(datasets, 'DS1', read_ts_names={d: 'read' for d in ['DS1', 'DS2', 'DS3']}) # n less than number of datasets is no longer allowed with pytest.raises(ValueError): process = Validation( dm, 'DS1', temporal_matcher=temporal_matchers.BasicTemporalMatching( window=1 / 24.0).combinatory_matcher, scaling='lin_cdf_match', metrics_calculators={ (2, 2): metrics_calculators.BasicMetrics(other_name='k1').calc_metrics})
def test_validation_n2_k2_temporal_matching_no_matches(): tst_results = {} datasets = setup_two_without_overlap() process = Validation( datasets, 'DS1', temporal_matcher=temporal_matchers.BasicTemporalMatching( window=1 / 24.0).combinatory_matcher, scaling='lin_cdf_match', metrics_calculators={ (2, 2): metrics_calculators.BasicMetrics(other_name='k1').calc_metrics}) jobs = process.get_processing_jobs() for job in jobs: results = process.calc(*job) assert sorted(list(results)) == sorted(list(tst_results))
} } } # The datasets dictionary contains all the information about the datasets to read. The `class` is the dataset class # to use which we have already initialized. The `columns` key describes which columns of the dataset interest us for # validation. This a mandatory field telling the framework which other columns to ignore. In this case the columns # `soil moisture_flag` and `soil moisture_orig_flag` will be ignored by the ISMN reader. We can also specify # additional keywords that should be given to the `read_ts` method of the dataset reader. In this case we want the # ASCAT reader to mask the ASCAT soil moisture using the included frozen and snow probabilities as well as the SSF. # There are also other keys that can be used here. Please see the documentation for explanations. # In[8]: period = [datetime(2007, 1, 1), datetime(2014, 12, 31)] basic_metrics = metrics_calculators.BasicMetrics(other_name='k1') process = Validation(datasets, 'ISMN', temporal_ref='ASCAT', scaling='lin_cdf_match', scaling_ref='ASCAT', metrics_calculators={(2, 2): basic_metrics.calc_metrics}, period=period) # During the initialization of the Validation class we can also tell it other things that it needs to know. In this # case it uses the datasets we have specified earlier. The spatial reference is the `'ISMN'` dataset which is the # second argument. The 'metrics_calculators' argument looks a little bit strange so let's look at it in more detail. # # It is a dictionary with a tuple as the key and a function as the value. The key tuple `(n, k)` has the following # meaning: `n` datasets are temporally matched together and then given in sets of `k` columns to the metric
def test_ascat_ismn_validation(): """ Test processing framework with some ISMN and ASCAT sample data """ ascat_data_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data', 'sat', 'ascat', 'netcdf', '55R22') ascat_grid_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data', 'sat', 'ascat', 'netcdf', 'grid') static_layers_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data', 'sat', 'h_saf', 'static_layer') ascat_reader = AscatSsmCdr(ascat_data_folder, ascat_grid_folder, static_layer_path=static_layers_folder) ascat_reader.read_bulk = True # Initialize ISMN reader ismn_data_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data', 'ismn', 'multinetwork', 'header_values') ismn_reader = ISMN_Interface(ismn_data_folder) jobs = [] ids = ismn_reader.get_dataset_ids( variable='soil moisture', min_depth=0, max_depth=0.1) for idx in ids: metadata = ismn_reader.metadata[idx] jobs.append((idx, metadata['longitude'], metadata['latitude'])) # Create the variable ***save_path*** which is a string representing the # path where the results will be saved. **DO NOT CHANGE** the name # ***save_path*** because it will be searched during the parallel # processing! save_path = tempfile.mkdtemp() # Create the validation object. datasets = { 'ISMN': { 'class': ismn_reader, 'columns': ['soil moisture'] }, 'ASCAT': { 'class': ascat_reader, 'columns': ['sm'], 'kwargs': {'mask_frozen_prob': 80, 'mask_snow_prob': 80, 'mask_ssf': True} }} period = [datetime(2007, 1, 1), datetime(2014, 12, 31)] process = Validation( datasets, 'ISMN', temporal_ref='ASCAT', scaling='lin_cdf_match', scaling_ref='ASCAT', metrics_calculators={ (2, 2): metrics_calculators.BasicMetrics(other_name='k1').calc_metrics}, period=period) for job in jobs: results = process.calc(*job) netcdf_results_manager(results, save_path) results_fname = os.path.join( save_path, 'ASCAT.sm_with_ISMN.soil moisture.nc') vars_should = [u'n_obs', u'tau', u'gpi', u'RMSD', u'lon', u'p_tau', u'BIAS', u'p_rho', u'rho', u'lat', u'R', u'p_R'] n_obs_should = [384, 357, 482, 141, 251, 1927, 1887, 1652] rho_should = np.array([0.70022893, 0.53934574, 0.69356072, 0.84189808, 0.74206454, 0.30299741, 0.53143877, 0.62204134], dtype=np.float32) rmsd_should = np.array([7.72966719, 11.58347607, 14.57700157, 13.06224251, 12.90389824, 14.24668026, 21.19682884, 17.3883934], dtype=np.float32) with nc.Dataset(results_fname) as results: assert sorted(results.variables.keys()) == sorted(vars_should) assert sorted(results.variables['n_obs'][:].tolist()) == sorted( n_obs_should) nptest.assert_allclose(sorted(rho_should), sorted(results.variables['rho'][:]), rtol=1e-4) nptest.assert_allclose(sorted(rmsd_should), sorted(results.variables['RMSD'][:]), rtol=1e-4)
def test_validation_n3_k2_masking(): # test result for one gpi in a cell tst_results_one = { (('DS1', 'x'), ('DS3', 'y')): { 'n_obs': np.array([250], dtype=np.int32)}, (('DS1', 'x'), ('DS2', 'y')): { 'n_obs': np.array([250], dtype=np.int32)}, (('DS1', 'x'), ('DS3', 'x')): { 'n_obs': np.array([250], dtype=np.int32)}} # test result for two gpis in a cell tst_results_two = { (('DS1', 'x'), ('DS3', 'y')): { 'n_obs': np.array([250, 250], dtype=np.int32)}, (('DS1', 'x'), ('DS2', 'y')): { 'n_obs': np.array([250, 250], dtype=np.int32)}, (('DS1', 'x'), ('DS3', 'x')): { 'n_obs': np.array([250, 250], dtype=np.int32)}} # cell 4 in this example has two gpis so it returns different results. tst_results = {1: tst_results_one, 1: tst_results_one, 2: tst_results_two} datasets = setup_TestDatasets() # setup masking datasets grid = grids.CellGrid(np.array([1, 2, 3, 4]), np.array([1, 2, 3, 4]), np.array([4, 4, 2, 1]), gpis=np.array([1, 2, 3, 4])) mds1 = GriddedTsBase("", grid, MaskingTestDataset) mds2 = GriddedTsBase("", grid, MaskingTestDataset) mds = { 'masking1': { 'class': mds1, 'columns': ['x'], 'args': [], 'kwargs': {'limit': 500}, 'use_lut': False, 'grids_compatible': True}, 'masking2': { 'class': mds2, 'columns': ['x'], 'args': [], 'kwargs': {'limit': 750}, 'use_lut': False, 'grids_compatible': True} } process = Validation( datasets, 'DS1', temporal_matcher=temporal_matchers.BasicTemporalMatching( window=1 / 24.0).combinatory_matcher, scaling='lin_cdf_match', metrics_calculators={ (3, 2): metrics_calculators.BasicMetrics(other_name='k1').calc_metrics}, masking_datasets=mds) gpi_info = (1, 1, 1) ref_df = datasets['DS1']['class'].read_ts(1) new_ref_df = process.mask_dataset(ref_df, gpi_info) assert len(new_ref_df) == 250 nptest.assert_allclose(new_ref_df.x.values, np.arange(750, 1000)) jobs = process.get_processing_jobs() for job in jobs: results = process.calc(*job) tst = tst_results[len(job[0])] assert sorted(list(results)) == sorted(list(tst)) for key, tst_key in zip(sorted(results), sorted(tst)): nptest.assert_almost_equal(results[key]['n_obs'], tst[tst_key]['n_obs'])
def test_ascat_ismn_validation(ascat_reader, ismn_reader): """ Test processing framework with some ISMN and ASCAT sample data """ jobs = [] ids = ismn_reader.get_dataset_ids(variable="soil moisture", min_depth=0, max_depth=0.1) for idx in ids: metadata = ismn_reader.metadata[idx] jobs.append((idx, metadata["longitude"], metadata["latitude"])) # Create the variable ***save_path*** which is a string representing the # path where the results will be saved. **DO NOT CHANGE** the name # ***save_path*** because it will be searched during the parallel # processing! save_path = tempfile.mkdtemp() # Create the validation object. datasets = { "ISMN": { "class": ismn_reader, "columns": ["soil moisture"] }, "ASCAT": { "class": ascat_reader, "columns": ["sm"], "kwargs": { "mask_frozen_prob": 80, "mask_snow_prob": 80, "mask_ssf": True, }, }, } read_ts_names = {"ASCAT": "read", "ISMN": "read_ts"} period = [datetime(2007, 1, 1), datetime(2014, 12, 31)] datasets = DataManager(datasets, "ISMN", period, read_ts_names=read_ts_names) process = Validation( datasets, "ISMN", temporal_ref="ASCAT", scaling="lin_cdf_match", scaling_ref="ASCAT", metrics_calculators={ (2, 2): metrics_calculators.BasicMetrics(other_name="k1").calc_metrics }, period=period, ) for job in jobs: results = process.calc(*job) netcdf_results_manager(results, save_path) results_fname = os.path.join(save_path, "ASCAT.sm_with_ISMN.soil moisture.nc") # targets target_vars = { "n_obs": [357, 384, 1646, 1875, 1915, 467, 141, 251], "rho": np.array([ 0.53934574, 0.7002289, 0.62200236, 0.53647155, 0.30413666, 0.6740655, 0.8418981, 0.74206454 ], dtype=np.float32), "RMSD": np.array([ 11.583476, 7.729667, 17.441547, 21.125721, 14.31557, 14.187225, 13.0622425, 12.903898 ], dtype=np.float32) } check_results( filename=results_fname, target_vars=target_vars, )
def test_ascat_ismn_validation(): """ Test processing framework with some ISMN and ASCAT sample data """ ascat_data_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data', 'sat', 'ascat', 'netcdf', '55R22') ascat_grid_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data', 'sat', 'ascat', 'netcdf', 'grid') ascat_reader = AscatH25_SSM(ascat_data_folder, ascat_grid_folder) ascat_reader.read_bulk = True ascat_reader._load_grid_info() # Initialize ISMN reader ismn_data_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data', 'ismn', 'multinetwork', 'header_values') ismn_reader = ISMN_Interface(ismn_data_folder) jobs = [] ids = ismn_reader.get_dataset_ids(variable='soil moisture', min_depth=0, max_depth=0.1) for idx in ids: metadata = ismn_reader.metadata[idx] jobs.append((idx, metadata['longitude'], metadata['latitude'])) # Create the variable ***save_path*** which is a string representing the # path where the results will be saved. **DO NOT CHANGE** the name # ***save_path*** because it will be searched during the parallel # processing! save_path = tempfile.mkdtemp() # Create the validation object. datasets = { 'ISMN': { 'class': ismn_reader, 'columns': ['soil moisture'] }, 'ASCAT': { 'class': ascat_reader, 'columns': ['sm'], 'kwargs': { 'mask_frozen_prob': 80, 'mask_snow_prob': 80, 'mask_ssf': True } } } period = [datetime(2007, 1, 1), datetime(2014, 12, 31)] process = Validation( datasets, 'ISMN', temporal_ref='ASCAT', scaling='lin_cdf_match', scaling_ref='ASCAT', metrics_calculators={ (2, 2): metrics_calculators.BasicMetrics(other_name='k1').calc_metrics }, period=period) for job in jobs: results = process.calc(*job) netcdf_results_manager(results, save_path) results_fname = os.path.join(save_path, 'ASCAT.sm_with_ISMN.soil moisture.nc') vars_should = [ u'n_obs', u'tau', u'gpi', u'RMSD', u'lon', u'p_tau', u'BIAS', u'p_rho', u'rho', u'lat', u'R', u'p_R' ] n_obs_should = [360, 385, 1644, 1881, 1927, 479, 140, 251] rho_should = np.array([ 0.546187, 0.717398, 0.620892, 0.532465, 0.302997, 0.694713, 0.840592, 0.742065 ], dtype=np.float32) rmsd_should = np.array([ 11.536263, 7.545650, 17.451935, 21.193714, 14.246680, 14.494674, 13.173215, 12.903898 ], dtype=np.float32) with nc.Dataset(results_fname) as results: assert sorted(results.variables.keys()) == sorted(vars_should) assert sorted( results.variables['n_obs'][:].tolist()) == sorted(n_obs_should) nptest.assert_allclose(sorted(rho_should), sorted(results.variables['rho'][:]), rtol=1e-4) nptest.assert_allclose(sorted(rmsd_should), sorted(results.variables['RMSD'][:]), rtol=1e-4)
def test_validation_n3_k2_masking(): # test result for one gpi in a cell tst_results_one = { (("DS1", "x"), ("DS3", "y")): { "n_obs": np.array([250], dtype=np.int32) }, (("DS1", "x"), ("DS2", "y")): { "n_obs": np.array([250], dtype=np.int32) }, (("DS1", "x"), ("DS3", "x")): { "n_obs": np.array([250], dtype=np.int32) }, (("DS2", "y"), ("DS3", "x")): { "n_obs": np.array([250], dtype=np.int32) }, (("DS2", "y"), ("DS3", "y")): { "n_obs": np.array([250], dtype=np.int32) }, } # test result for two gpis in a cell tst_results_two = { (("DS1", "x"), ("DS3", "y")): { "n_obs": np.array([250, 250], dtype=np.int32) }, (("DS1", "x"), ("DS2", "y")): { "n_obs": np.array([250, 250], dtype=np.int32) }, (("DS1", "x"), ("DS3", "x")): { "n_obs": np.array([250, 250], dtype=np.int32) }, (("DS2", "y"), ("DS3", "x")): { "n_obs": np.array([250, 250], dtype=np.int32) }, (("DS2", "y"), ("DS3", "y")): { "n_obs": np.array([250, 250], dtype=np.int32) }, } # cell 4 in this example has two gpis so it returns different results. tst_results = {1: tst_results_one, 1: tst_results_one, 2: tst_results_two} datasets = setup_TestDatasets() # setup masking datasets grid = grids.CellGrid( np.array([1, 2, 3, 4]), np.array([1, 2, 3, 4]), np.array([4, 4, 2, 1]), gpis=np.array([1, 2, 3, 4]), ) mds1 = GriddedTsBase("", grid, MaskingTestDataset) mds2 = GriddedTsBase("", grid, MaskingTestDataset) mds = { "masking1": { "class": mds1, "columns": ["x"], "args": [], "kwargs": {"limit": 500}, "use_lut": False, "grids_compatible": True, }, "masking2": { "class": mds2, "columns": ["x"], "args": [], "kwargs": {"limit": 750}, "use_lut": False, "grids_compatible": True, }, } process = Validation( datasets, "DS1", temporal_matcher=temporal_matchers.BasicTemporalMatching( window=1 / 24.0 ).combinatory_matcher, scaling="lin_cdf_match", metrics_calculators={ (3, 2): metrics_calculators.BasicMetrics( other_name="k1" ).calc_metrics }, masking_datasets=mds, ) gpi_info = (1, 1, 1) ref_df = datasets["DS1"]["class"].read(1) with warnings.catch_warnings(): warnings.simplefilter( "ignore", category=DeprecationWarning ) # read_ts is hard coded when using mask_data new_ref_df = process.mask_dataset(ref_df, gpi_info) assert len(new_ref_df) == 250 nptest.assert_allclose(new_ref_df.x.values, np.arange(750, 1000)) jobs = process.get_processing_jobs() for job in jobs: with warnings.catch_warnings(): # most warnings here are caused by the read_ts function that cannot # be changed when using a masking data set warnings.simplefilter("ignore", category=DeprecationWarning) results = process.calc(*job) tst = tst_results[len(job[0])] assert sorted(list(results)) == sorted(list(tst)) for key, tst_key in zip(sorted(results), sorted(tst)): nptest.assert_almost_equal( results[key]["n_obs"], tst[tst_key]["n_obs"] )
def test_validation_n3_k2_masking_no_data_remains(): datasets = setup_TestDatasets() # setup masking datasets grid = grids.CellGrid( np.array([1, 2, 3, 4]), np.array([1, 2, 3, 4]), np.array([4, 4, 2, 1]), gpis=np.array([1, 2, 3, 4]), ) mds1 = GriddedTsBase("", grid, MaskingTestDataset) mds2 = GriddedTsBase("", grid, MaskingTestDataset) mds = { "masking1": { "class": mds1, "columns": ["x"], "args": [], "kwargs": {"limit": 500}, "use_lut": False, "grids_compatible": True, }, "masking2": { "class": mds2, "columns": ["x"], "args": [], "kwargs": {"limit": 1000}, "use_lut": False, "grids_compatible": True, }, } process = Validation( datasets, "DS1", temporal_matcher=temporal_matchers.BasicTemporalMatching( window=1 / 24.0 ).combinatory_matcher, scaling="lin_cdf_match", metrics_calculators={ (3, 2): metrics_calculators.BasicMetrics( other_name="k1" ).calc_metrics }, masking_datasets=mds, ) gpi_info = (1, 1, 1) ref_df = datasets["DS1"]["class"].read(1) with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=DeprecationWarning) new_ref_df = process.mask_dataset(ref_df, gpi_info) assert len(new_ref_df) == 0 nptest.assert_allclose(new_ref_df.x.values, np.arange(1000, 1000)) jobs = process.get_processing_jobs() for job in jobs: with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=DeprecationWarning) results = process.calc(*job) tst = [] assert sorted(list(results)) == sorted(list(tst)) for key, tst_key in zip(sorted(results), sorted(tst)): nptest.assert_almost_equal( results[key]["n_obs"], tst[tst_key]["n_obs"] )
def test_ascat_ismn_validation_metadata(ascat_reader, ismn_reader): """ Test processing framework with some ISMN and ASCAT sample data """ jobs = [] ids = ismn_reader.get_dataset_ids(variable="soil moisture", min_depth=0, max_depth=0.1) metadata_dict_template = { "network": np.array(["None"], dtype="U256"), "station": np.array(["None"], dtype="U256"), "landcover": np.float32([np.nan]), "climate": np.array(["None"], dtype="U4"), } for idx in ids: metadata = ismn_reader.metadata[idx] metadata_dict = [{ "network": metadata["network"], "station": metadata["station"], "landcover": metadata["landcover_2010"], "climate": metadata["climate"], }] jobs.append( (idx, metadata["longitude"], metadata["latitude"], metadata_dict)) # Create the variable ***save_path*** which is a string representing the # path where the results will be saved. **DO NOT CHANGE** the name # ***save_path*** because it will be searched during the parallel # processing! save_path = tempfile.mkdtemp() # Create the validation object. datasets = { "ISMN": { "class": ismn_reader, "columns": ["soil moisture"], }, "ASCAT": { "class": ascat_reader, "columns": ["sm"], "kwargs": { "mask_frozen_prob": 80, "mask_snow_prob": 80, "mask_ssf": True, }, }, } read_ts_names = {"ASCAT": "read", "ISMN": "read_ts"} period = [datetime(2007, 1, 1), datetime(2014, 12, 31)] datasets = DataManager(datasets, "ISMN", period, read_ts_names=read_ts_names) process = Validation( datasets, "ISMN", temporal_ref="ASCAT", scaling="lin_cdf_match", scaling_ref="ASCAT", metrics_calculators={ (2, 2): metrics_calculators.BasicMetrics( other_name="k1", metadata_template=metadata_dict_template).calc_metrics }, period=period, ) for job in jobs: results = process.calc(*job) netcdf_results_manager(results, save_path) results_fname = os.path.join(save_path, "ASCAT.sm_with_ISMN.soil moisture.nc") target_vars = { "n_obs": [357, 384, 1646, 1875, 1915, 467, 141, 251], "rho": np.array([ 0.53934574, 0.7002289, 0.62200236, 0.53647155, 0.30413666, 0.6740655, 0.8418981, 0.74206454, ], dtype=np.float32), "RMSD": np.array([ 11.583476, 7.729667, 17.441547, 21.125721, 14.31557, 14.187225, 13.0622425, 12.903898, ], dtype=np.float32), "network": np.array( [ "MAQU", "MAQU", "SCAN", "SCAN", "SCAN", "SOILSCAPE", "SOILSCAPE", "SOILSCAPE", ], dtype="U256", ) } vars_should = [ 'BIAS', 'R', 'RMSD', '_row_size', 'climate', 'gpi', 'idx', 'landcover', 'lat', 'lon', 'n_obs', 'network', 'p_R', 'p_rho', 'p_tau', 'rho', 'station', 'tau', 'time' ] check_results(filename=results_fname, target_vars=target_vars, variables=vars_should)
def test_validation_with_averager(ascat_reader, ismn_reader): """ Test processing framework with averaging module. ASCAT and ISMN data are used here with no geographical considerations (the lut is provided more upstream and contains this information already) """ while hasattr(ascat_reader, 'cls'): ascat_reader = ascat_reader.cls # lookup table between the ascat and ismn points - not geographically correct upscaling_lut = { "ISMN": { 1814367: [(0, 102.1333, 33.8833), (1, 102.1333, 33.6666)], 1803695: [(2, -86.55, 34.783), (3, -97.083, 37.133), (4, -105.417, 34.25)], 1856312: [(5, -120.9675, 38.43003), (6, -120.78559, 38.14956), (7, -120.80639, 38.17353)] } } gpis = (1814367, 1803695, 1856312) lons, lats = [], [] for gpi in gpis: lon, lat = ascat_reader.grid.gpi2lonlat(gpi) lons.append(lon) lats.append(lat) jobs = [(gpis, lons, lats)] # Create the variable ***save_path*** which is a string representing the # path where the results will be saved. **DO NOT CHANGE** the name # ***save_path*** because it will be searched during the parallel # processing! save_path = tempfile.mkdtemp() # Create the validation object. datasets = { "ASCAT": { "class": ascat_reader, "columns": ["sm"], "kwargs": { "mask_frozen_prob": 80, "mask_snow_prob": 80, "mask_ssf": True, } }, "ISMN": { "class": ismn_reader, "columns": ["soil moisture"], }, } read_ts_names = {"ASCAT": "read", "ISMN": "read_ts"} period = [datetime(2007, 1, 1), datetime(2014, 12, 31)] datasets = DataManager( datasets, "ASCAT", period, read_ts_names=read_ts_names, upscale_parms={ "upscaling_method": "average", "temporal_stability": True, "upscaling_lut": upscaling_lut, }, ) process = Validation( datasets, "ASCAT", temporal_ref="ISMN", scaling="lin_cdf_match", scaling_ref="ISMN", metrics_calculators={ (2, 2): metrics_calculators.BasicMetrics(other_name="k1").calc_metrics }, period=period, ) for job in jobs: results = process.calc(*job) netcdf_results_manager(results, save_path) results_fname = os.path.join(save_path, "ASCAT.sm_with_ISMN.soil moisture.nc") target_vars = { "n_obs": [764, 2392, 904], "rho": np.array([-0.012487, 0.255156, 0.635517], dtype=np.float32), "RMSD": np.array([0.056428, 0.056508, 0.116294], dtype=np.float32), "R": np.array([-0.012335, 0.257671, 0.657239], dtype=np.float32) } check_results( filename=results_fname, target_vars=target_vars, )
def test_ascat_ismn_validation_metadata(ascat_reader): """ Test processing framework with some ISMN and ASCAT sample data """ # Initialize ISMN reader ismn_data_folder = os.path.join( os.path.dirname(__file__), "..", "test-data", "ismn", "multinetwork", "header_values", ) ismn_reader = ISMN_Interface(ismn_data_folder) jobs = [] ids = ismn_reader.get_dataset_ids( variable="soil moisture", min_depth=0, max_depth=0.1 ) metadata_dict_template = { "network": np.array(["None"], dtype="U256"), "station": np.array(["None"], dtype="U256"), "landcover": np.float32([np.nan]), "climate": np.array(["None"], dtype="U4"), } for idx in ids: metadata = ismn_reader.metadata[idx] metadata_dict = [ { "network": metadata["network"], "station": metadata["station"], "landcover": metadata["landcover_2010"], "climate": metadata["climate"], } ] jobs.append( (idx, metadata["longitude"], metadata["latitude"], metadata_dict) ) # Create the variable ***save_path*** which is a string representing the # path where the results will be saved. **DO NOT CHANGE** the name # ***save_path*** because it will be searched during the parallel # processing! save_path = tempfile.mkdtemp() # Create the validation object. datasets = { "ISMN": { "class": ismn_reader, "columns": ["soil moisture"], }, "ASCAT": { "class": ascat_reader, "columns": ["sm"], "kwargs": { "mask_frozen_prob": 80, "mask_snow_prob": 80, "mask_ssf": True, }, }, } read_ts_names = {"ASCAT": "read", "ISMN": "read_ts"} period = [datetime(2007, 1, 1), datetime(2014, 12, 31)] datasets = DataManager( datasets, "ISMN", period, read_ts_names=read_ts_names ) process = Validation( datasets, "ISMN", temporal_ref="ASCAT", scaling="lin_cdf_match", scaling_ref="ASCAT", metrics_calculators={ (2, 2): metrics_calculators.BasicMetrics( other_name="k1", metadata_template=metadata_dict_template ).calc_metrics }, period=period, ) for job in jobs: results = process.calc(*job) netcdf_results_manager(results, save_path) results_fname = os.path.join( save_path, "ASCAT.sm_with_ISMN.soil moisture.nc" ) vars_should = [ u"n_obs", u"tau", u"gpi", u"RMSD", u"lon", u"p_tau", u"BIAS", u"p_rho", u"rho", u"lat", u"R", u"p_R", u"time", u"idx", u"_row_size", ] for key, value in metadata_dict_template.items(): vars_should.append(key) n_obs_should = [357, 384, 1646, 1875, 1915, 467, 141, 251] rho_should = np.array( [ 0.53934574, 0.7002289, 0.62200236, 0.53647155, 0.30413666, 0.6740655, 0.8418981, 0.74206454, ], dtype=np.float32, ) rmsd_should = np.array( [ 11.583476, 7.729667, 17.441547, 21.125721, 14.31557, 14.187225, 13.0622425, 12.903898, ], dtype=np.float32, ) network_should = np.array( [ "MAQU", "MAQU", "SCAN", "SCAN", "SCAN", "SOILSCAPE", "SOILSCAPE", "SOILSCAPE", ], dtype="U256", ) with nc.Dataset(results_fname, mode="r") as results: vars = results.variables.keys() n_obs = results.variables["n_obs"][:].tolist() rho = results.variables["rho"][:] rmsd = results.variables["RMSD"][:] network = results.variables["network"][:] assert sorted(vars) == sorted(vars_should) assert sorted(n_obs) == sorted(n_obs_should) nptest.assert_allclose(sorted(rho), sorted(rho_should), rtol=1e-4) nptest.assert_allclose(sorted(rmsd), sorted(rmsd_should), rtol=1e-4) nptest.assert_equal(sorted(network), sorted(network_should))