def test_regrid_is_skipped_if_grids_are_the_same(): """Test that regridding is skipped if the grids are the same.""" cube = _make_cube(lat=LAT_SPEC1, lon=LON_SPEC1) scheme = 'linear' # regridding to the same spec returns the same cube expected_same_cube = regrid(cube, target_grid='10x10', scheme=scheme) assert expected_same_cube is cube # regridding to a different spec returns a different cube expected_different_cube = regrid(cube, target_grid='5x5', scheme=scheme) assert expected_different_cube is not cube
def main(cfg): """Process data for use as input to the wflow hydrological model.""" input_metadata = cfg['input_data'].values() for dataset, metadata in group_metadata(input_metadata, 'dataset').items(): all_vars, provenance = get_input_cubes(metadata) # Interpolating variables onto the dem grid # Read the target cube, which contains target grid and target elevation dem_path = Path(cfg['auxiliary_data_dir']) / cfg['dem_file'] dem = load_dem(dem_path) check_dem(dem, cfg['region']) dem = extract_region(dem, **cfg['region']) logger.info("Processing variable precipitation_flux") pr_dem = regrid(all_vars['pr'], target_grid=dem, scheme='linear') logger.info("Processing variable temperature") tas_dem = regrid_temperature(all_vars['tas'], all_vars['orog'], dem) logger.info("Processing variable potential evapotranspiration") if 'evspsblpot' in all_vars: pet = all_vars['evspsblpot'] else: logger.info("Potential evapotransporation not available, deriving") pet = debruin_pet( tas=all_vars['tas'], psl=all_vars['psl'], rsds=all_vars['rsds'], rsdt=all_vars['rsdt'], ) pet_dem = regrid(pet, target_grid=dem, scheme='linear') pet_dem.var_name = 'pet' logger.info("Converting units") pet_dem.units = pet_dem.units / 'kg m-3' pet_dem.data = pet_dem.core_data() / 1000. pet_dem.convert_units('mm day-1') pr_dem.units = pr_dem.units / 'kg m-3' pr_dem.data = pr_dem.core_data() / 1000. pr_dem.convert_units('mm day-1') tas_dem.convert_units('degC') # Adjust longitude coordinate to wflow convention for cube in [tas_dem, pet_dem, pr_dem]: cube.coord('longitude').points = (cube.coord('longitude').points + 180) % 360 - 180 cubes = iris.cube.CubeList([pr_dem, tas_dem, pet_dem]) save(cubes, dataset, provenance, cfg)
def _regrid_dataset(in_dir, var, cfg): """ Regridding of original files. This function regrids each file and write to disk appending 'regrid' in front of filename. """ filelist = glob.glob(os.path.join(in_dir, var['file'])) for infile in filelist: _, infile_tail = os.path.split(infile) outfile_tail = infile_tail.replace('c3s', 'c3s_regridded') outfile = os.path.join(cfg['work_dir'], outfile_tail) with catch_warnings(): filterwarnings( action='ignore', # Full message: # UserWarning: Skipping global attribute 'long_name': # 'long_name' is not a permitted attribute message="Skipping global attribute 'long_name'", category=UserWarning, module='iris', ) cube = iris.load_cube(infile, constraint=utils.var_name_constraint( var['raw'])) cube = regrid(cube, cfg['custom']['regrid_resolution'], 'nearest') logger.info("Saving: %s", outfile) iris.save(cube, outfile)
def _get_cube_for_year(year, in_dir, cfg): """Exract cube containing one year from raw file.""" logger.info("Processing year %i", year) bin_files = glob.glob( os.path.join(in_dir, f"{cfg['binary_prefix']}{year}*.bin")) # Read files of one year cubes = iris.cube.CubeList() for bin_file in bin_files: raw_data = np.fromfile(bin_file, DTYPE, N_LAT * N_LON).reshape(1, N_LAT, N_LON) raw_data = np.ma.masked_equal(raw_data, MISSING_VALUE) raw_data = raw_data.astype(np.float32) raw_data /= SCALE_FACTOR # Build coordinates and cube, regrid, and append it coords = _get_coords(year, bin_file, cfg) cube = iris.cube.Cube(raw_data, dim_coords_and_dims=coords) if cfg.get('regrid'): cube = regrid(cube, cfg['regrid']['target_grid'], cfg['regrid']['scheme']) cubes.append(cube) # Build cube for single year with monthly data # (Raw data has two values per month) cube = cubes.concatenate_cube() iris.coord_categorisation.add_month_number(cube, 'time') cube = cube.aggregated_by('month_number', iris.analysis.MEAN) # Cache cube on disk to save memory cached_path = os.path.join(in_dir, f'{year}.nc') iris.save(cube, cached_path) logger.info("Cached %s", cached_path) return cached_path
def test_regrid__unstructured_nearest(self): data = np.empty((1, 1)) lons = iris.coords.DimCoord([1.6], standard_name='longitude', bounds=[[1, 2]], units='degrees_east', coord_system=self.cs) lats = iris.coords.DimCoord([1.6], standard_name='latitude', bounds=[[1, 2]], units='degrees_north', coord_system=self.cs) coords_spec = [(lats, 0), (lons, 1)] grid = iris.cube.Cube(data, dim_coords_and_dims=coords_spec) # Replace 1d spatial coords with 2d spatial coords. lons = self.cube.coord('longitude') lats = self.cube.coord('latitude') x, y = np.meshgrid(lons.points, lats.points) lats = iris.coords.AuxCoord(x, **lats._as_defn()._asdict()) lons = iris.coords.AuxCoord(y, **lons._as_defn()._asdict()) self.cube.remove_coord('longitude') self.cube.remove_coord('latitude') self.cube.remove_coord('Pressure Slice') self.cube.add_aux_coord(lons, (1, 2)) self.cube.add_aux_coord(lats, (1, 2)) result = regrid(self.cube, grid, 'unstructured_nearest') expected = np.array([[[3]], [[7]], [[11]]]) self.assertArrayAlmostEqual(result.data, expected)
def test_regrid__cell_specification(self): specs = ['1x1', '2x2', '3x3', '4x4', '5x5'] scheme = 'linear' for spec in specs: result = regrid(self.src_cube, spec, scheme) self.assertEqual(result, self.regridded_cube) self._check(spec, scheme, spec=True) self.assertEqual(set(_CACHE.keys()), set(specs))
def land_swe_top(run): """ Compute median-absolute difference of SWE against GlobSnow. Arguments: run - dictionary containing model run metadata (see auto_assess/model_run.py for description) Returns: metrics - dictionary of metrics names and values """ supermean_data_dir = os.path.join(run['data_root'], run['runid'], run['_area'] + '_supermeans') snow_seasons = ['son', 'djf', 'mam'] # Calculate rms errors for seasons with snow. metrics = dict() for season in snow_seasons: clim_file = os.path.join(run['climfiles_root'], 'SWE_clm_{}.pp'.format(season)) swe_clim = iris.load_cube(clim_file) swe_clim.data = np.ma.masked_array(swe_clim.data, mask=(swe_clim.data == -1e20)) # snowfall swe_run = get_supermean('surface_snow_amount', season, supermean_data_dir) # Force same coord_system swe_run.coord('longitude').coord_system = swe_clim.coord( 'longitude').coord_system swe_run.coord('latitude').coord_system = swe_clim.coord( 'latitude').coord_system # Force the units for SWE to match the model swe_clim.units = swe_run.units # form the difference # active regridding here swe_run = regrid(swe_run, swe_clim, 'linear') dff = swe_run - swe_clim iris.save( dff, os.path.join(run['dump_output'], 'snow_diff_{}.nc'.format(season))) # Calculate median absolute error of the difference name = "snow MedAbsErr {}".format(season) metrics[name] = float(np.ma.median(np.ma.abs(dff.data))) return metrics
def compute(self): """Compute Eady Growth Rate and either it's annual or seasonal mean.""" data = group_metadata(self.cfg['input_data'].values(), 'alias') for alias in data: var = group_metadata(data[alias], 'short_name') temperature = iris.load_cube(var['ta'][0]['filename']) plev = temperature.coord('air_pressure') theta = self.potential_temperature(temperature, plev) del temperature geopotential = iris.load_cube(var['zg'][0]['filename']) brunt = self.brunt_vaisala_frq(theta, geopotential) lats = geopotential.coord('latitude') fcor = self.coriolis(lats, geopotential.shape) eastward_wind = iris.load_cube(var['ua'][0]['filename']) if eastward_wind.shape is not geopotential.shape: eastward_wind = regrid(eastward_wind, geopotential, scheme='linear') egr = self.eady_growth_rate(fcor, eastward_wind, geopotential, brunt) cube_egr = eastward_wind.copy(egr * 86400) cube_egr.standard_name = None cube_egr.long_name = 'eady_growth_rate' cube_egr.var_name = 'egr' cube_egr.units = 'day-1' if self.time_statistic == 'annual_mean': cube_egr = annual_statistics(cube_egr) cube_egr = cube_egr.collapsed('time', iris.analysis.MEAN) elif self.time_statistic == 'seasonal_mean': cube_egr = seasonal_statistics(cube_egr) cube_egr = cube_egr.collapsed('time', iris.analysis.MEAN) self.seasonal_plots(cube_egr, alias) else: logger.info( "Parameter time_statistic is not well set in the recipe." "Must be 'annual_mean' or 'seasonal_mean'") sys.exit() self.save(cube_egr, alias, data)
def regrid_temperature(src_temp, src_height, target_height): """Convert temperature to target grid with lapse rate correction.""" # Convert 2m temperature to sea-level temperature (slt) src_dtemp = lapse_rate_correction(src_height) src_slt = src_temp.copy(data=src_temp.core_data() + src_dtemp.core_data()) # Interpolate sea-level temperature to target grid target_slt = regrid(src_slt, target_grid=target_height, scheme='linear') # Convert sea-level temperature to new target elevation target_dtemp = lapse_rate_correction(target_height) target_temp = target_slt target_temp.data = target_slt.core_data() - target_dtemp.core_data() return target_temp
def test_regrid__cell_specification(self): # Clear cache before and after the test to avoid poisoning # the cache with Mocked cubes # https://github.com/ESMValGroup/ESMValCore/issues/953 _CACHE.clear() specs = ['1x1', '2x2', '3x3', '4x4', '5x5'] scheme = 'linear' for spec in specs: result = regrid(self.src_cube, spec, scheme) self.assertEqual(result, self.regridded_cube) self._check(spec, scheme, spec=True) self.assertEqual(set(_CACHE.keys()), set(specs)) _CACHE.clear()
def run_regrid( cube, settings: typing.Dict, **kwargs, ): target_grid = settings["target_grid"] scheme = settings["scheme"] lat_offset = settings.get("lat_offset", True) lon_offset = settings.get("lon_offset", True) cubes = regrid(cube=cube, target_grid=target_grid, scheme=scheme, lat_offset=lat_offset, lon_offset=lon_offset) return cubes
def test_regrid__unstructured_nearest(self): data = np.empty((1, 1)) lons = iris.coords.DimCoord([1.6], standard_name='longitude', bounds=[[1, 2]], units='degrees_east', coord_system=self.cs) lats = iris.coords.DimCoord([1.6], standard_name='latitude', bounds=[[1, 2]], units='degrees_north', coord_system=self.cs) coords_spec = [(lats, 0), (lons, 1)] grid = iris.cube.Cube(data, dim_coords_and_dims=coords_spec) # Replace 1d spatial coords with 2d spatial coords. lons = self.cube.coord('longitude') lats = self.cube.coord('latitude') x, y = np.meshgrid(lons.points, lats.points) lats = iris.coords.AuxCoord( x, standard_name=lats.metadata.standard_name, long_name=lats.metadata.long_name, var_name=lats.metadata.var_name, units=lats.metadata.units, attributes=lats.metadata.attributes, coord_system=lats.metadata.coord_system, climatological=lats.metadata.climatological) lons = iris.coords.AuxCoord( y, standard_name=lons.metadata.standard_name, long_name=lons.metadata.long_name, var_name=lons.metadata.var_name, units=lons.metadata.units, attributes=lons.metadata.attributes, coord_system=lons.metadata.coord_system, climatological=lons.metadata.climatological) self.cube.remove_coord('longitude') self.cube.remove_coord('latitude') self.cube.remove_coord('Pressure Slice') self.cube.add_aux_coord(lons, (1, 2)) self.cube.add_aux_coord(lats, (1, 2)) result = regrid(self.cube, grid, 'unstructured_nearest') expected = np.array([[[3]], [[7]], [[11]]]) np.testing.assert_array_almost_equal(result.data, expected, decimal=6)
def test_regrid__area_weighted(self): data = np.empty((1, 1)) lons = iris.coords.DimCoord([1.6], standard_name='longitude', bounds=[[1, 2]], units='degrees_east', coord_system=self.cs) lats = iris.coords.DimCoord([1.6], standard_name='latitude', bounds=[[1, 2]], units='degrees_north', coord_system=self.cs) coords_spec = [(lats, 0), (lons, 1)] grid = iris.cube.Cube(data, dim_coords_and_dims=coords_spec) result = regrid(self.cube, grid, 'area_weighted') expected = np.array([1.499886, 5.499886, 9.499886]) np.testing.assert_array_almost_equal(result.data, expected, decimal=6)
def test_regrid__nearest(self): data = np.empty((1, 1)) lons = iris.coords.DimCoord([1.6], standard_name='longitude', bounds=[[1, 2]], units='degrees_east', coord_system=self.cs) lats = iris.coords.DimCoord([1.6], standard_name='latitude', bounds=[[1, 2]], units='degrees_north', coord_system=self.cs) coords_spec = [(lats, 0), (lons, 1)] grid = iris.cube.Cube(data, dim_coords_and_dims=coords_spec) result = regrid(self.cube, grid, 'nearest') expected = np.array([[[3]], [[7]], [[11]]]) self.assertArrayEqual(result.data, expected)
def test_regrid__linear_extrapolate(self): data = np.empty((3, 3)) lons = iris.coords.DimCoord([0, 1.5, 3], standard_name='longitude', bounds=[[0, 1], [1, 2], [2, 3]], units='degrees_east', coord_system=self.cs) lats = iris.coords.DimCoord([0, 1.5, 3], standard_name='latitude', bounds=[[0, 1], [1, 2], [2, 3]], units='degrees_north', coord_system=self.cs) coords_spec = [(lats, 0), (lons, 1)] grid = iris.cube.Cube(data, dim_coords_and_dims=coords_spec) result = regrid(self.cube, grid, 'linear_extrapolate') expected = [[[-3., -1.5, 0.], [0., 1.5, 3.], [3., 4.5, 6.]], [[1., 2.5, 4.], [4., 5.5, 7.], [7., 8.5, 10.]], [[5., 6.5, 8.], [8., 9.5, 11.], [11., 12.5, 14.]]] self.assertArrayEqual(result.data, expected)
def test_regrid__nearest_extrapolate_with_mask(self): data = np.empty((3, 3)) lons = iris.coords.DimCoord([0, 1.6, 3], standard_name='longitude', bounds=[[0, 1], [1, 2], [2, 3]], units='degrees_east', coord_system=self.cs) lats = iris.coords.DimCoord([0, 1.6, 3], standard_name='latitude', bounds=[[0, 1], [1, 2], [2, 3]], units='degrees_north', coord_system=self.cs) coords_spec = [(lats, 0), (lons, 1)] grid = iris.cube.Cube(data, dim_coords_and_dims=coords_spec) result = regrid(self.cube, grid, 'nearest') expected = ma.empty((3, 3, 3)) expected.mask = ma.masked expected[:, 1, 1] = np.array([3, 7, 11]) self.assertArrayEqual(result.data, expected)
def test_regrid__horizontal_schemes(self): for scheme in self.regrid_schemes: result = regrid(self.src_cube, self.tgt_grid, scheme) self.assertEqual(result, self.regridded_cube) self._check(self.tgt_grid, scheme)
def test_invalid_scheme__unknown(self): dummy = mock.sentinel.dummy emsg = 'Unknown regridding scheme' with self.assertRaisesRegex(ValueError, emsg): regrid(dummy, dummy, 'wibble')
def test_invalid_tgt_grid__unknown(self): dummy = mock.sentinel.dummy scheme = 'linear' emsg = 'Expecting a cube' with self.assertRaisesRegex(ValueError, emsg): regrid(self.src_cube, dummy, scheme)
def main(cfg): # The config object is a dict of all the metadata from the pre-processor # get variable processed var = list(extract_variables(cfg).keys()) assert len(var) == 1 var = var[0] if var == "pr": rel_change = True else: rel_change = False # first group datasets by project.. # this creates a dict of datasets keyed by project (CMIP5, CMIP6 etc.) projects = group_metadata(cfg["input_data"].values(), "project") # how to uniquely define a dataset varies by project, for CMIP it's simple, just dataset... # for CORDEX, combo of dataset and driver (and possibly also domain if we start adding those) # also gets more complex if we start adding in different ensembles.. # This section of the code loads and organises the data to be ready for plotting logger.info("Loading data") # empty dict to store results projections = {} model_lists = {} cordex_drivers = [] # loop over projects for proj in projects: # we now have a list of all the data entries.. # for CMIPs we can just group metadata again by dataset then work with that.. models = group_metadata(projects[proj], "dataset") # empty dict for results if proj == 'non-cordex-rcm': proj = 'CORDEX' if proj == 'non-cmip5-gcm': proj = 'CMIP5' if proj not in projections.keys(): projections[proj] = {} proj_key = proj # loop over the models for m in models: if proj == "CORDEX": # then we need to go one deeper in the dictionary to deal with driving models drivers = group_metadata(models[m], "driver") projections[proj][m] = dict.fromkeys(drivers.keys()) for d in drivers: logging.info(f"Calculating anomalies for {proj} {m} {d}") anoms = get_anomalies(drivers[d], rel_change) if anoms is None: continue projections[proj][m][d] = anoms if proj not in model_lists: model_lists[proj] = [] model_lists[proj].append(f"{m} {d}") # fix shorthand driver names if d == 'HadGEM': d = 'MOHC-HadGEM2-ES' elif d == 'MPI': d = 'MPI-M-MPI-ESM-LR' if proj == "CORDEX": cordex_drivers.append(d) elif proj == "UKCP18": # go deeper to deal with ensembles and datasets # split UKCP into seperate GCM and RCM proj_key = f"UKCP18 {m}" ensembles = group_metadata(models[m], "ensemble") projections[proj_key] = dict.fromkeys(ensembles.keys()) for ens in ensembles: logging.info(f"Calculating anomalies for {proj_key} {ens}") anoms = get_anomalies(ensembles[ens], rel_change) if anoms is None: continue projections[proj_key][ens] = anoms if proj_key not in model_lists: model_lists[proj_key] = [] model_lists[proj_key].append(f"{proj_key} {ens}") elif "cordex-cpm" in proj: # in this case need to split by domain as same model spec # is used in multiple domains in some cases domains = group_metadata(models[m], "domain") proj_key = "cordex-cpm" projections[proj_key][m] = dict.fromkeys(domains.keys()) for dom in domains: logging.info( f"calculating anomalies for {proj_key} {dom} {m}") anoms = get_anomalies(domains[dom], rel_change) projections[proj_key][m][dom] = anoms if proj_key not in model_lists: model_lists[proj_key] = [] model_lists[proj_key].append(f"{dom} {m}") else: logging.info(f"Calculating anomalies for {proj} {m}") anoms = get_anomalies(models[m], rel_change) if anoms is None: continue projections[proj][m] = anoms if proj not in model_lists: model_lists[proj] = [] model_lists[proj].append(f"{m}") # remove any empty categories (i.e. UKCP18 which has been split into rcm and gcm) if projections[proj] == {}: del projections[proj] cordex_drivers = set(cordex_drivers) # create two extra subsets containing CORDEX drivers, and CPM drivers projections['CORDEX_drivers'] = {} cmip5_driving_models = [] for m in cordex_drivers: cmip5_driving_models.append(remove_institute_from_driver(m)) for m in projections['CMIP5']: if m in cmip5_driving_models: projections['CORDEX_drivers'][m] = projections['CMIP5'][m] projections['CPM_drivers'] = {} for rcm in projections['CORDEX']: for d in projections['CORDEX'][rcm]: if f'{rcm} {d}' in list(CPM_DRIVERS.values()): projections['CPM_drivers'][f'{rcm} {d}'] = projections[ 'CORDEX'][rcm][d] # compute multi model means for p in projections: mm_mean = compute_multi_model_stats( list(NestedDictValues(projections[p])), iris.analysis.MEAN) projections[p]['mean'] = mm_mean # compute regridded versions for CORDEX and CPMs for p in projections: grid = None if p == 'CORDEX': grid = projections['CORDEX_drivers']['mean'] scheme = 'area_weighted' elif p == 'cordex-cpm': grid = projections['CPM_drivers']['mean'] scheme = 'area_weighted' if grid: src = projections[p]['mean'] regrid_mean = regrid(src, grid, scheme) projections[p]['mean_rg'] = regrid_mean # compute regrid diffs for p in projections: if p == 'CORDEX': diff = projections[p]['mean_rg'] - projections['CORDEX_drivers'][ 'mean'] projections[p]['diff_rg'] = diff elif p == 'cordex-cpm': diff = projections[p]['mean_rg'] - projections['CPM_drivers'][ 'mean'] projections[p]['diff_rg'] = diff # this section of the code does the plotting.. # we now have all the projections in the projections dictionary # now lets plot them # first we need to process the dictionary, and move the data into a list of vectors # the projections object is the key one that contains all our data.. seasons = {0: "DJF", 1: "MAM", 2: "JJA", 3: "SON"} logger.info("Plotting") extent = ( cfg["domain"]["start_longitude"] - 2, cfg["domain"]["end_longitude"] + 2, cfg["domain"]["start_latitude"] - 2, cfg["domain"]["end_latitude"] + 2, ) for s in seasons.keys(): # make directory try: os.mkdir(f"{cfg['plot_dir']}/{seasons[s]}") except FileExistsError: pass for p in projections: pdata = process_projections_dict(projections[p], s) for m in pdata: # dont plot driving model data twice. if '_drivers' in p: if m != 'mean': continue title = f"{p} {m} {seasons[s]} {var} change" plt.figure(figsize=(12.8, 9.6)) ax = plt.axes(projection=ccrs.PlateCarree()) plot_map(pdata[m], extent, var, ax, True) plt.title(title) logging.info(f'Saving plot for {p} {m} {s}') plt.savefig( f"{cfg['plot_dir']}/{seasons[s]}/{p}_{m}_map_{seasons[s]}.png" ) plt.close() # save calculated anomaly data, in case we want to work with it later # make directory try: os.mkdir(f"{cfg['work_dir']}/{seasons[s]}") except FileExistsError: pass iris.save( pdata[m], f"{cfg['work_dir']}/{seasons[s]}/{p}_{m}_anom_{seasons[s]}.nc" ) # now make panel plots for the mean data # only if we have CPM data though if 'cordex-cpm' in projections: scon = iris.Constraint(season_number=s) logging.info(f'Making {seasons[s]} panel plot') plt.figure(figsize=(12.8, 9.6)) # plots should include. All CMIP5, CORDEX drivers, CORDEX, CPM drivers, CPM. ax = plt.subplot(331, projection=ccrs.PlateCarree()) cmesh = plot_map(projections['CMIP5']['mean'].extract(scon), extent, var, ax) plt.title('CMIP5') ax = plt.subplot(334, projection=ccrs.PlateCarree()) plot_map(projections['CORDEX_drivers']['mean'].extract(scon), extent, var, ax) plt.title('CORDEX driving models') ax = plt.subplot(335, projection=ccrs.PlateCarree()) plot_map(projections['CORDEX']['mean'].extract(scon), extent, var, ax) plt.title('CORDEX') # plot diff of CORDEX to CMIP ax = plt.subplot(336, projection=ccrs.PlateCarree()) cmesh_diff = plot_map( projections['CORDEX']['diff_rg'].extract(scon), extent, f'{var}_diff', ax) plt.title('CORDEX - CMIP5 diff') ax = plt.subplot(337, projection=ccrs.PlateCarree()) plot_map(projections['CPM_drivers']['mean'].extract(scon), extent, var, ax) plt.title('CPM driving models') ax = plt.subplot(338, projection=ccrs.PlateCarree()) plot_map(projections['cordex-cpm']['mean'].extract(scon), extent, var, ax) plt.title('CPM') # plot diff of CPM to CORDEX ax = plt.subplot(339, projection=ccrs.PlateCarree()) plot_map(projections['cordex-cpm']['diff_rg'].extract(scon), extent, f'{var}_diff', ax) plt.title('CPM - CORDEX diff') # add legends ax = plt.subplot(332) ax.axis("off") plt.colorbar(cmesh, orientation="horizontal") ax = plt.subplot(333) ax.axis("off") plt.colorbar(cmesh_diff, orientation="horizontal") plt.suptitle(f'{seasons[s]} {var} change') plt.savefig( f"{cfg['plot_dir']}/{seasons[s]}/all_means_map_{seasons[s]}.png" ) # print all datasets used print("Input models for plots:") for p in model_lists.keys(): print(f"{p}: {len(model_lists[p])} models") print(model_lists[p]) print("")
def land_surf_rad(run): """ Compute median absolute errors against CERES-EBAF data. Arguments: run - dictionary containing model run metadata (see auto_assess/model_run.py for description) Returns: metrics - dictionary of metrics names and values. """ supermean_data_dir = os.path.join(run['data_root'], run['runid'], run['_area'] + '_supermeans') rad_seasons = ['ann', 'djf', 'mam', 'jja', 'son'] rad_fld = ['SurfRadNSW', 'SurfRadNLW'] # Land mask: Use fractional mask for now. # Fraction of Land m01s03i395 # replaced with a constant sftlf mask; original was # lnd = get_supermean('land_area_fraction', 'ann', supermean_data_dir) cubes = iris.load(os.path.join(supermean_data_dir, 'cubeList.nc')) lnd = cubes.extract_cube(iris.Constraint(name='land_area_fraction')) metrics = dict() for season in rad_seasons: for fld in rad_fld: if fld == 'SurfRadNSW': ebaf_fld = get_supermean( 'Surface Net downward Shortwave Radiation', season, run['clim_root'], obs_flag='CERES-EBAF') run_fld_rad = get_supermean( 'Surface Net downward Shortwave Radiation', season, supermean_data_dir) elif fld == 'SurfRadNLW': ebaf_fld = get_supermean( 'Surface Net downward Longwave Radiation', season, run['clim_root'], obs_flag='CERES-EBAF') run_fld_rad = get_supermean( 'Surface Net downward Longwave Radiation', season, supermean_data_dir) else: raise Exception('Skipping unassigned case.') # Regrid both to land points and mask out where this is below # a threshold. Force the coordinate system on model. ebaf_fld.coord('latitude').coord_system = \ run_fld_rad.coord('latitude').coord_system ebaf_fld.coord('longitude').coord_system = \ run_fld_rad.coord('longitude').coord_system lnd.coord('latitude').coord_system = \ run_fld_rad.coord('latitude').coord_system lnd.coord('longitude').coord_system = \ run_fld_rad.coord('longitude').coord_system reg_run_fld = regrid(run_fld_rad, lnd, 'linear') reg_ebaf_fld = regrid(ebaf_fld, lnd, 'linear') # apply the mask reg_run_fld.data = np.ma.masked_array(reg_run_fld.data, mask=(lnd.data > 90.)) reg_ebaf_fld.data = np.ma.masked_array(reg_ebaf_fld.data, mask=(lnd.data > 90.)) # do a simple diff dff = reg_run_fld - reg_ebaf_fld name = "{} MedAbsErr {}".format(fld, season) metrics[name] = float(np.ma.median(np.abs(dff.data))) return metrics
def land_sm_top(run): """ Calculate median absolute errors for soil mosture against CCI data. Arguments: run - dictionary containing model run metadata (see auto_assess/model_run.py for description) Returns: metrics - dictionary of metrics names and values """ supermean_data_dir = os.path.join(run['data_root'], run['runid'], run['_area'] + '_supermeans') seasons = ['djf', 'mam', 'jja', 'son'] # Constants # density of water and ice rhow = 1000. rhoi = 917. # first soil layer depth dz1 = 0.1 # Work through each season metrics = dict() for season in seasons: fname = 'ecv_soil_moisture_{}.nc'.format(season) clim_file = os.path.join(run['climfiles_root'], fname) ecv_clim = iris.load_cube(clim_file) # correct invalid units if (ecv_clim.units == 'unknown' and 'invalid_units' in ecv_clim.attributes): if ecv_clim.attributes['invalid_units'] == 'm^3m^-3': ecv_clim.units = 'm3 m-3' # m01s08i223 # standard_name: mrsos smcl_run = get_supermean('moisture_content_of_soil_layer', season, supermean_data_dir) # m01s08i229i # standard_name: ??? # TODO: uncomment when implemented # sthu_run = get_supermean( # 'mass_fraction_of_unfrozen_water_in_soil_moisture', season, # supermean_data_dir) # m01s08i230 # standard_name: ??? soil_frozen_water_content - mrfso # TODO: uncomment when implemented # sthf_run = get_supermean( # 'mass_fraction_of_frozen_water_in_soil_moisture', season, # supermean_data_dir) # TODO: remove after correct implementation sthu_run = smcl_run sthf_run = smcl_run # extract top soil layer cubes = [smcl_run, sthu_run, sthf_run] for i, cube in enumerate(cubes): if cube.coord('depth').attributes['positive'] != 'down': logger.warning('Cube %s depth attribute is not down', cube) top_level = min(cube.coord('depth').points) topsoil = iris.Constraint(depth=top_level) cubes[i] = cube.extract(topsoil) smcl_run, sthu_run, sthf_run = cubes # Set all sea points to missing data np.nan smcl_run.data[smcl_run.data < 0] = np.nan sthu_run.data[sthu_run.data < 0] = np.nan sthf_run.data[sthf_run.data < 0] = np.nan # set soil moisture to missing data on ice points (i.e. no soil) sthu_plus_sthf = (dz1 * rhow * sthu_run) + (dz1 * rhoi * sthf_run) ice_pts = sthu_plus_sthf.data == 0 sthu_plus_sthf.data[ice_pts] = np.nan # Calculate the volumetric soil moisture in m3/m3 theta_s_run = smcl_run / sthu_plus_sthf vol_sm1_run = theta_s_run * sthu_run vol_sm1_run.units = "m3 m-3" vol_sm1_run.long_name = "Top layer Soil Moisture" # update the coordinate system ECV data with a WGS84 coord system # TODO: ask Heather why this is needed # TODO: who is Heather? # unify coord systems for regridder vol_sm1_run.coord('longitude').coord_system = \ iris.coord_systems.GeogCS(semi_major_axis=6378137.0, inverse_flattening=298.257223563) vol_sm1_run.coord('latitude').coord_system = \ iris.coord_systems.GeogCS(semi_major_axis=6378137.0, inverse_flattening=298.257223563) ecv_clim.coord('longitude').coord_system = \ iris.coord_systems.GeogCS(semi_major_axis=6378137.0, inverse_flattening=298.257223563) ecv_clim.coord('latitude').coord_system = \ iris.coord_systems.GeogCS(semi_major_axis=6378137.0, inverse_flattening=298.257223563) # Interpolate to the grid of the climatology and form the difference vol_sm1_run = regrid(vol_sm1_run, ecv_clim, 'linear') # diff the cubes dff = vol_sm1_run - ecv_clim # Remove NaNs from data before aggregating statistics dff.data = np.ma.masked_invalid(dff.data) # save output iris.save( dff, os.path.join(run['dump_output'], 'soilmoist_diff_{}.nc'.format(season))) name = 'soilmoisture MedAbsErr {}'.format(season) metrics[name] = float(np.ma.median(np.ma.abs(dff.data))) return metrics