def test_validation_n3_k2(): tst_results = { (('DS1', 'x'), ('DS3', 'y')): { 'n_obs': np.array([1000], dtype=np.int32), 'tau': np.array([np.nan], dtype=np.float32), 'gpi': np.array([4], dtype=np.int32), 'RMSD': np.array([0.], dtype=np.float32), 'lon': np.array([4.]), 'p_tau': np.array([np.nan], dtype=np.float32), 'BIAS': np.array([0.], dtype=np.float32), 'p_rho': np.array([0.], dtype=np.float32), 'rho': np.array([1.], dtype=np.float32), 'lat': np.array([4.]), 'R': np.array([1.], dtype=np.float32), 'p_R': np.array([0.], dtype=np.float32)}, (('DS1', 'x'), ('DS2', 'y')): { 'n_obs': np.array([1000], dtype=np.int32), 'tau': np.array([np.nan], dtype=np.float32), 'gpi': np.array([4], dtype=np.int32), 'RMSD': np.array([0.], dtype=np.float32), 'lon': np.array([4.]), 'p_tau': np.array([np.nan], dtype=np.float32), 'BIAS': np.array([0.], dtype=np.float32), 'p_rho': np.array([0.], dtype=np.float32), 'rho': np.array([1.], dtype=np.float32), 'lat': np.array([4.]), 'R': np.array([1.], dtype=np.float32), 'p_R': np.array([0.], dtype=np.float32)}, (('DS1', 'x'), ('DS3', 'x')): { 'n_obs': np.array([1000], dtype=np.int32), 'tau': np.array([np.nan], dtype=np.float32), 'gpi': np.array([4], dtype=np.int32), 'RMSD': np.array([0.], dtype=np.float32), 'lon': np.array([4.]), 'p_tau': np.array([np.nan], dtype=np.float32), 'BIAS': np.array([0.], dtype=np.float32), 'p_rho': np.array([0.], dtype=np.float32), 'rho': np.array([1.], dtype=np.float32), 'lat': np.array([4.]), 'R': np.array([1.], dtype=np.float32), 'p_R': np.array([0.], dtype=np.float32)}} datasets = setup_TestDatasets() process = Validation( datasets, 'DS1', temporal_matcher=temporal_matchers.BasicTemporalMatching( window=1 / 24.0).combinatory_matcher, scaling='lin_cdf_match', metrics_calculators={ (3, 2): metrics_calculators.BasicMetrics(other_name='k1').calc_metrics}) jobs = process.get_processing_jobs() for job in jobs: results = process.calc(*job) assert sorted(list(results)) == sorted(list(tst_results))
def test_validation_n3_k2_masking_no_data_remains(): datasets = setup_TestDatasets() # setup masking datasets grid = grids.CellGrid(np.array([1, 2, 3, 4]), np.array([1, 2, 3, 4]), np.array([4, 4, 2, 1]), gpis=np.array([1, 2, 3, 4])) mds1 = GriddedTsBase("", grid, MaskingTestDataset) mds2 = GriddedTsBase("", grid, MaskingTestDataset) mds = { 'masking1': { 'class': mds1, 'columns': ['x'], 'args': [], 'kwargs': {'limit': 500}, 'use_lut': False, 'grids_compatible': True}, 'masking2': { 'class': mds2, 'columns': ['x'], 'args': [], 'kwargs': {'limit': 1000}, 'use_lut': False, 'grids_compatible': True} } process = Validation( datasets, 'DS1', temporal_matcher=temporal_matchers.BasicTemporalMatching( window=1 / 24.0).combinatory_matcher, scaling='lin_cdf_match', metrics_calculators={ (3, 2): metrics_calculators.BasicMetrics(other_name='k1').calc_metrics}, masking_datasets=mds) gpi_info = (1, 1, 1) ref_df = datasets['DS1']['class'].read_ts(1) new_ref_df = process.mask_dataset(ref_df, gpi_info) assert len(new_ref_df) == 0 nptest.assert_allclose(new_ref_df.x.values, np.arange(1000, 1000)) jobs = process.get_processing_jobs() for job in jobs: results = process.calc(*job) tst = [] assert sorted(list(results)) == sorted(list(tst)) for key, tst_key in zip(sorted(results), sorted(tst)): nptest.assert_almost_equal(results[key]['n_obs'], tst[tst_key]['n_obs'])
def test_validation_n3_k2_masking(): # test result for one gpi in a cell tst_results_one = { (('DS1', 'x'), ('DS3', 'y')): { 'n_obs': np.array([250], dtype=np.int32)}, (('DS1', 'x'), ('DS2', 'y')): { 'n_obs': np.array([250], dtype=np.int32)}, (('DS1', 'x'), ('DS3', 'x')): { 'n_obs': np.array([250], dtype=np.int32)}} # test result for two gpis in a cell tst_results_two = { (('DS1', 'x'), ('DS3', 'y')): { 'n_obs': np.array([250, 250], dtype=np.int32)}, (('DS1', 'x'), ('DS2', 'y')): { 'n_obs': np.array([250, 250], dtype=np.int32)}, (('DS1', 'x'), ('DS3', 'x')): { 'n_obs': np.array([250, 250], dtype=np.int32)}} # cell 4 in this example has two gpis so it returns different results. tst_results = {1: tst_results_one, 1: tst_results_one, 2: tst_results_two} datasets = setup_TestDatasets() # setup masking datasets grid = grids.CellGrid(np.array([1, 2, 3, 4]), np.array([1, 2, 3, 4]), np.array([4, 4, 2, 1]), gpis=np.array([1, 2, 3, 4])) mds1 = GriddedTsBase("", grid, MaskingTestDataset) mds2 = GriddedTsBase("", grid, MaskingTestDataset) mds = { 'masking1': { 'class': mds1, 'columns': ['x'], 'args': [], 'kwargs': {'limit': 500}, 'use_lut': False, 'grids_compatible': True}, 'masking2': { 'class': mds2, 'columns': ['x'], 'args': [], 'kwargs': {'limit': 750}, 'use_lut': False, 'grids_compatible': True} } process = Validation( datasets, 'DS1', temporal_matcher=temporal_matchers.BasicTemporalMatching( window=1 / 24.0).combinatory_matcher, scaling='lin_cdf_match', metrics_calculators={ (3, 2): metrics_calculators.BasicMetrics(other_name='k1').calc_metrics}, masking_datasets=mds) gpi_info = (1, 1, 1) ref_df = datasets['DS1']['class'].read_ts(1) new_ref_df = process.mask_dataset(ref_df, gpi_info) assert len(new_ref_df) == 250 nptest.assert_allclose(new_ref_df.x.values, np.arange(750, 1000)) jobs = process.get_processing_jobs() for job in jobs: results = process.calc(*job) tst = tst_results[len(job[0])] assert sorted(list(results)) == sorted(list(tst)) for key, tst_key in zip(sorted(results), sorted(tst)): nptest.assert_almost_equal(results[key]['n_obs'], tst[tst_key]['n_obs'])
def test_DataManager_get_data(): datasets = setup_TestDatasets() dm = DataManager(datasets, 'DS1') data = dm.get_data(1, 1, 1) assert sorted(list(data)) == ['DS1', 'DS2', 'DS3']