def create_model_file(self): """Creates an object that will eventually output the netCDF file.""" self._model_file = Base(self.data_path, self.parameter.filename_template) self._model_file.variable = self.var self._model_file.model_version = self.obs_or_model self._model_file.period = self.parameter.period self._model_file.ext = "nc" self._model_file.case_id = self.parameter.case_id self._model_file.realization = self.parameter.realization self.apply_custom_keys(self._model_file, self.parameter.custom_keys, self.var)
def __init__(self, parameter, var_name_long, obs_dict, sftlf): logging.getLogger("pcmdi_metrics").setLevel(LOG_LEVEL) self.parameter = parameter self.var_name_long = var_name_long self.obs_dict = obs_dict self.var = var_name_long.split('_')[0] self.sftlf = sftlf self.metrics_def_dictionary = {} self.metrics_dictionary = {} self.out_file = Base(self.parameter.metrics_output_path, self.parameter.output_json_template) self.regrid_method = '' self.regrid_tool = '' self.table_realm = '' self.realm = '' self.setup_regrid_and_realm_vars() self.setup_out_file() self.setup_metrics_dictionary()
def __init__(self, parameter, var_name_long, obs_dict, sftlf): logging.basicConfig(level=logging.DEBUG) self.parameter = parameter self.var_name_long = var_name_long self.obs_dict = obs_dict self.var = var_name_long.split('_')[0] self.sftlf = sftlf self.metrics_def_dictionary = {} self.metrics_dictionary = {} string_template = "%(variable)%(level)_%(target_grid_name)_" +\ "%(regrid_tool)_%(regrid_method)_metrics" self.out_file = Base(self.parameter.metrics_output_path, string_template) self.regrid_method = '' self.regrid_tool = '' self.table_realm = '' self.realm = '' self.setup_regrid_and_realm_vars() self.setup_out_file() self.setup_metrics_dictionary()
def output_interpolated_model_climatologies(self, test, test_data): ''' Save the netCDF file. ''' region_name = self.get_region_name_from_region(test.region) pth = os.path.join(self.parameter.test_clims_interpolated_output, region_name) clim_file = Base(pth, self.parameter.filename_output_template) logging.getLogger("pcmdi_metrics").info( 'Saving interpolated climatologies to: %s' % clim_file()) clim_file.level = self.out_file.level clim_file.model_version = test.obs_or_model clim_file.table = self.table_realm clim_file.period = self.parameter.period clim_file.case_id = self.parameter.case_id clim_file.set_target_grid(self.parameter.target_grid, self.regrid_tool, self.regrid_method) clim_file.variable = self.var clim_file.region = region_name clim_file.realization = self.parameter.realization DataSet.apply_custom_keys(clim_file, self.parameter.custom_keys, self.var) clim_file.write(test_data, type="nc", id=self.var)
class OutputMetrics(object): def __init__(self, parameter, var_name_long, obs_dict, sftlf): logging.getLogger("pcmdi_metrics").setLevel(LOG_LEVEL) self.parameter = parameter self.var_name_long = var_name_long self.obs_dict = obs_dict self.var = var_name_long.split('_')[0] self.sftlf = sftlf self.metrics_def_dictionary = {} self.metrics_dictionary = {} self.out_file = Base(self.parameter.metrics_output_path, self.parameter.output_json_template) self.regrid_method = '' self.regrid_tool = '' self.table_realm = '' self.realm = '' self.setup_regrid_and_realm_vars() self.setup_out_file() self.setup_metrics_dictionary() def setup_metrics_dictionary(self): ''' Initalize the results dict (metrics_dictionary) and the metrics documentation dict (metrics_def_dictionary) which is put in the results dict. ''' self.metrics_def_dictionary = collections.OrderedDict() self.metrics_dictionary = collections.OrderedDict() self.metrics_dictionary["DISCLAIMER"] = self.open_disclaimer() if self.parameter.user_notes is not None: self.metrics_dictionary["USER_NOTES"] = self.parameter.user_notes self.metrics_dictionary["RESULTS"] = collections.OrderedDict() self.metrics_dictionary["Variable"] = {} self.metrics_dictionary["Variable"]["id"] = self.var self.metrics_dictionary["json_version"] = '3.0' self.metrics_dictionary["References"] = {} self.metrics_dictionary["RegionalMasking"] = {} level = DataSet.calculate_level_from_var(self.var_name_long) if level is None: self.out_file.level = '' else: self.metrics_dictionary["Variable"]["level"] = level self.out_file.level = "-%i" % (int(level / 100.0)) def open_disclaimer(self): ''' Return the contents of disclaimer.txt. ''' f = DataSet.load_path_as_file_obj('disclaimer.txt') contents = f.read() f.close() return contents def setup_regrid_and_realm_vars(self): ''' Set the regrid_method, regrid_tool, table_realm, and realm based off the obs dict and var. ''' if DataSet.use_omon(self.obs_dict, self.var): self.regrid_method = self.parameter.regrid_method_ocn self.regrid_tool = self.parameter.regrid_tool_ocn self.table_realm = 'Omon' self.realm = "ocn" else: self.regrid_method = self.parameter.regrid_method self.regrid_tool = self.parameter.regrid_tool self.table_realm = 'Amon' self.realm = "atm" def setup_out_file(self): ''' Setup for the out_file, which outputs both the .json and .txt. ''' self.out_file.set_target_grid(self.parameter.target_grid, self.regrid_tool, self.regrid_method) self.out_file.variable = self.var self.out_file.realm = self.realm self.out_file.table = self.table_realm self.out_file.case_id = self.parameter.case_id if hasattr(self, "obs_or_model"): self.out_file.model_version = self.obs_or_model for key in self.out_file.keys(): if hasattr(self.parameter, key): setattr(self.out_file, key, getattr(self.parameter, key)) if hasattr(self, key): setattr(self.out_file, key, getattr(self, key)) DataSet.apply_custom_keys(self.out_file, self.parameter.custom_keys, self.var) def add_region(self, region): ''' Add a region to the metrics_dictionary. ''' self.metrics_dictionary['RegionalMasking'][ self.get_region_name_from_region(region)] = region def calculate_and_output_metrics(self, ref, test): ''' Given ref and test (both either of type Observation or Model), compute the metrics. ''' if isinstance(self.obs_dict[self.var][ref.obs_or_model], basestring): self.obs_var_ref = self.obs_dict[self.var][self.obs_dict[self.var][ ref.obs_or_model]] else: self.obs_var_ref = self.obs_dict[self.var][ref.obs_or_model] self.metrics_dictionary['References'][ ref.obs_or_model] = self.obs_var_ref try: ref_data = ref() except Exception as e: msg = 'Error while processing observation %s for variables %s:\n\t%s' logging.getLogger("pcmdi_metrics").error( msg % (ref.obs_or_model, self.var, str(e))) if ref_data is None: # Something went bad! raise RuntimeError('Could not load reference {}'.format( ref.obs_or_model)) try: test_data = test() except RuntimeError: # THIS EXCEPTION IS RAISED TO BREAK OUT OF THE FOR LOOP IN PCMDI_DRIVER # THIS SHOULD BE A CUSTOM EXCEPTION (PrematureBreakError) raise RuntimeError('Need to skip model: %s' % test.obs_or_model) # Todo: Make this a fcn self.set_grid_in_metrics_dictionary(test_data) if ref_data.shape != test_data.shape: raise RuntimeError( 'Two data sets have different shapes. %s vs %s' % (ref_data.shape, test_data.shape)) self.set_simulation_desc(test, test_data) if ref.obs_or_model not in self.metrics_dictionary['RESULTS'][ test.obs_or_model]: self.metrics_dictionary["RESULTS"][test.obs_or_model][ref.obs_or_model] = \ {'source': self.obs_dict[self.var][ref.obs_or_model]} parameter_realization = self.metrics_dictionary["RESULTS"][test.obs_or_model][ref.obs_or_model].\ get(self.parameter.realization, {}) if not self.parameter.dry_run: pr_rgn = pcmdi_metrics.pcmdi.compute_metrics( self.var_name_long, test_data, ref_data) # Calling compute_metrics with None for the model and obs returns # the definitions. self.metrics_def_dictionary.update( pcmdi_metrics.pcmdi.compute_metrics(self.var_name_long, None, None)) if hasattr(self.parameter, 'compute_custom_metrics'): pr_rgn.update( self.parameter.compute_custom_metrics( self.var_name_long, test_data, ref_data)) try: self.metrics_def_dictionary.update( self.parameter.compute_custom_metrics( self.var_name_long, None, None)) except Exception: self.metrics_def_dictionary.update({ 'custom': self.parameter.compute_custom_metrics.__doc__ }) parameter_realization[self.get_region_name_from_region( ref.region)] = collections.OrderedDict( (k, pr_rgn[k]) for k in sorted(pr_rgn.keys())) self.metrics_dictionary['RESULTS'][test.obs_or_model][ref.obs_or_model][self.parameter.realization] = \ parameter_realization if self.check_save_test_clim(ref): self.output_interpolated_model_climatologies(test, test_data) self.write_on_exit() def set_grid_in_metrics_dictionary(self, test_data): ''' Set the grid in metrics_dictionary. ''' grid = {} grid['RegridMethod'] = self.regrid_method grid['RegridTool'] = self.regrid_tool grid['GridName'] = self.parameter.target_grid grid['GridResolution'] = test_data.shape[1:] self.metrics_dictionary['GridInfo'] = grid def set_simulation_desc(self, test, test_data): ''' Fillout information for the output .json and .txt files. ''' self.metrics_dictionary["RESULTS"][test.obs_or_model] = \ self.metrics_dictionary["RESULTS"].get(test.obs_or_model, {}) if "SimulationDescription" not in \ self.metrics_dictionary["RESULTS"][test.obs_or_model]: descr = { "MIPTable": self.obs_var_ref["CMIP_CMOR_TABLE"], "Model": test.obs_or_model, } sim_descr_mapping = { "ModelActivity": "project_id", "ModellingGroup": "institute_id", "Experiment": "experiment", "ModelFreeSpace": "ModelFreeSpace", "Realization": "realization", "creation_date": "creation_date", } sim_descr_mapping.update( getattr(self.parameter, "simulation_description_mapping", {})) for att in list(sim_descr_mapping.keys()): nm = sim_descr_mapping[att] if not isinstance(nm, (list, tuple)): nm = ["%s", nm] fmt = nm[0] vals = [] for a in nm[1:]: # First trying from parameter file if hasattr(self.parameter, a): vals.append(getattr(self.parameter, a)) # Now fall back on file... else: f = cdms2.open(test.file_path()) if hasattr(f, a): try: vals.append(float(getattr(f, a))) except Exception: vals.append(getattr(f, a)) # Ok couldn't find it anywhere # setting to N/A else: vals.append("N/A") f.close() descr[att] = fmt % tuple(vals) self.metrics_dictionary["RESULTS"][test.obs_or_model]["units"] = \ getattr(test_data, "units", "N/A") self.metrics_dictionary["RESULTS"][ test.obs_or_model]["SimulationDescription"] = descr self.metrics_dictionary["RESULTS"][test.obs_or_model]["InputClimatologyFileName"] = \ os.path.basename(test.file_path()) self.metrics_dictionary["RESULTS"][ test.obs_or_model]["InputClimatologyMD5"] = test.hash() # Not just global # TODO Ask Charles if the below check is needed # if len(self.regions_dict[self.var]) > 1: self.metrics_dictionary["RESULTS"][test.obs_or_model][ "InputRegionFileName"] = \ self.sftlf[test.obs_or_model]["filename"] self.metrics_dictionary["RESULTS"][test.obs_or_model][ "InputRegionMD5"] = \ self.sftlf[test.obs_or_model]["md5"] def output_interpolated_model_climatologies(self, test, test_data): ''' Save the netCDF file. ''' region_name = self.get_region_name_from_region(test.region) pth = os.path.join(self.parameter.test_clims_interpolated_output, region_name) clim_file = Base(pth, self.parameter.filename_output_template) logging.getLogger("pcmdi_metrics").info( 'Saving interpolated climatologies to: %s' % clim_file()) clim_file.level = self.out_file.level clim_file.model_version = test.obs_or_model clim_file.table = self.table_realm clim_file.period = self.parameter.period clim_file.case_id = self.parameter.case_id clim_file.set_target_grid(self.parameter.target_grid, self.regrid_tool, self.regrid_method) clim_file.variable = self.var clim_file.region = region_name clim_file.realization = self.parameter.realization DataSet.apply_custom_keys(clim_file, self.parameter.custom_keys, self.var) clim_file.write(test_data, type="nc", id=self.var) def get_region_name_from_region(self, region): ''' Extract the region name from the region dict. ''' # region is both in ref and test region_name = region['id'] if region is None: region_name = 'global' return region_name def check_save_test_clim(self, ref): ''' Bunch of checks to see if the netCDF files are needed to be saved. ''' # Since we are only saving once per reference data set (it's always # the same after), we need to check if ref is the first value from the # parameter, hence we have ref.obs_or_model == reference_data_set[0] reference_data_set = self.parameter.reference_data_set reference_data_set = Observation.setup_obs_list_from_parameter( reference_data_set, self.obs_dict, self.var) return not self.parameter.dry_run and hasattr(self.parameter, 'save_test_clims') \ and self.parameter.save_test_clims is True and ref.obs_or_model == reference_data_set[0] # noqa def write_on_exit(self): ''' Output the metrics_dictionary as a json and text file. ''' self.setup_out_file() self.metrics_dictionary['METRICS'] = self.metrics_def_dictionary if len(self.metrics_def_dictionary) == 0: raise RuntimeError("No results generated, cannot write to file") if not self.parameter.dry_run: logging.getLogger("pcmdi_metrics").info('Saving results to: %s' % self.out_file()) self.out_file.write(self.metrics_dictionary, json_structure=[ "model", "reference", "rip", "region", "statistic", "season" ], indent=4, separators=(',', ': '), mode="r+")
def create_sftlf(parameter): """Create the sftlf file from the parameter.""" sftlf = {} for test in parameter.test_data_set: tmp_name = getattr(parameter, "sftlf_filename_template") if tmp_name is None: # Not defined from commandline or param file tmp_name = parameter.filename_template sft = Base(parameter.test_data_path, tmp_name) sft.model_version = test sft.table = "fx" sft.realm = "atmos" sft.period = getattr(parameter, "period", "") sft.ext = "nc" sft.case_id = getattr(parameter, "case_id", "") sft.target_grid = None sft.realization = "r0i0p0" DataSet.apply_custom_keys(sft, parameter.custom_keys, "sftlf") try: sftlf[test] = {"raw": sft.get("sftlf")} sftlf[test]["filename"] = os.path.basename(sft()) sftlf[test]["md5"] = sft.hash() except Exception: sftlf[test] = {"raw": None} sftlf[test]["filename"] = None sftlf[test]["md5"] = None if parameter.target_grid == "2.5x2.5": t_grid = cdms2.createUniformGrid(-88.875, 72, 2.5, 0, 144, 2.5) else: t_grid = parameter.target_grid sft = cdutil.generateLandSeaMask(t_grid) sft[:] = sft.filled(1.0) * 100.0 sftlf["target_grid"] = sft return sftlf
class Model(pcmdi_metrics.driver.dataset.DataSet): """Handles all the computation (setting masking, target grid, etc) and some file I/O related to models.""" def __init__(self, parameter, var_name_long, region, model, obs_dict, data_path, sftlf): super(Model, self).__init__(parameter, var_name_long, region, obs_dict, data_path, sftlf) logging.getLogger("pcmdi_metrics").setLevel(LOG_LEVEL) self._model_file = None self.var_in_file = None self.obs_or_model = model self.create_model_file() self.setup_target_grid(self._model_file) self.setup_target_mask() def create_model_file(self): """Creates an object that will eventually output the netCDF file.""" self._model_file = Base(self.data_path, self.parameter.filename_template) self._model_file.variable = self.var self._model_file.model_version = self.obs_or_model self._model_file.period = self.parameter.period self._model_file.ext = "nc" self._model_file.case_id = self.parameter.case_id self._model_file.realization = self.parameter.realization self.apply_custom_keys(self._model_file, self.parameter.custom_keys, self.var) def setup_target_mask(self): """Sets the mask and target_mask attribute of self._model_file""" self.var_in_file = self.get_var_in_file() if self.region is not None: region_value = self.region.get("value", None) if region_value is not None: if self.sftlf[self.obs_or_model]["raw"] is None: self.create_sftlf_model_raw(self.var_in_file) self._model_file.mask = self.sftlf[self.obs_or_model]["raw"] self._model_file.target_mask = MV2.not_equal( self.sftlf["target_grid"], region_value) def get(self): """Gets the variable based on the region and level (if given) for the file from data_path, which is defined in the initalizer.""" try: if self.level is None: data_model = self._model_file.get(self.var, var_in_file=self.var_in_file, region=self.region) else: data_model = self._model_file.get( self.var, var_in_file=self.var_in_file, level=self.level, region=self.region, ) return data_model except Exception as e: msg = "Failed to get variables %s for versions: %s, error: %s" logging.getLogger("pcmdi_metrics").error( msg % (self.var, self.obs_or_model, e)) raise RuntimeError("Need to skip model: %s" % self.obs_or_model) def get_var_in_file(self): """Based off the model_tweaks parameter, get the variable mapping.""" tweaks = {} tweaks_all = {} if hasattr(self.parameter, "model_tweaks"): tweaks = self.parameter.model_tweaks.get(self.obs_or_model, {}) tweaks_all = self.parameter.model_tweaks.get(None, {}) var_in_file = tweaks.get("variable_mapping", {}).get(self.var, None) if var_in_file is None: if hasattr(self.parameter, "model_tweaks"): tweaks_all = self.parameter.model_tweaks.get(None, {}) var_in_file = tweaks_all.get("variable_mapping", {}).get(self.var, self.var) return var_in_file def create_sftlf_model_raw(self, var_in_file): """For the self.obs_or_model from the initializer, create a landSeaMask from cdutil for self.sftlf[self.obs_or_model]['raw'] value.""" if (not hasattr(self.parameter, "generate_sftlf") or self.parameter.generate_sftlf is False): logging.getLogger("pcmdi_metrics").info( "Model %s does not have sftlf, skipping region: %s" % (self.obs_or_model, self.region)) raise RuntimeError( "Model %s does not have sftlf, skipping region: %s" % (self.obs_or_model, self.region)) else: logging.getLogger("pcmdi_metrics").info( "Auto generating sftlf for model %s" % self._model_file()) if os.path.exists(self._model_file()): var_file = cdms2.open(self._model_file()) var = var_file[var_in_file] n = var.rank() - 2 # Minus lat and long sft = cdutil.generateLandSeaMask( var(*(slice(0, 1), ) * n)) * 100.0 sft[:] = sft.filled(100.0) self.sftlf[self.obs_or_model]["raw"] = sft var_file.close() logging.getLogger("pcmdi_metrics").info( "Auto generated sftlf for model %s" % self.obs_or_model) def hash(self): """Return a hash of the file.""" return self._model_file.hash() def file_path(self): """Return the path of the file.""" return self._model_file()
class Model(pcmdi_metrics.driver.dataset.DataSet): ''' Handles all the computation (setting masking, target grid, etc) and some file I/O related to models. ''' def __init__(self, parameter, var_name_long, region, model, obs_dict, data_path, sftlf): super(Model, self).__init__(parameter, var_name_long, region, obs_dict, data_path, sftlf) logging.basicConfig(level=logging.DEBUG) self._model_file = None self.var_in_file = None self.obs_or_model = model self.create_model_file() self.setup_target_grid(self._model_file) self.setup_target_mask() def create_model_file(self): ''' Creates an object that will eventually output the netCDF file. ''' self._model_file = Base(self.data_path, self.parameter.filename_template) self._model_file.variable = self.var self._model_file.model_version = self.obs_or_model self._model_file.period = self.parameter.period self._model_file.ext = 'nc' self._model_file.case_id = self.parameter.case_id self._model_file.realization = self.parameter.realization self.apply_custom_keys(self._model_file, self.parameter.custom_keys, self.var) def setup_target_mask(self): ''' Sets the mask and target_mask attribute of self._model_file ''' self.var_in_file = self.get_var_in_file() if self.region is not None: region_value = self.region.get('value', None) if region_value is not None: if self.sftlf[self.obs_or_model]['raw'] is None: self.create_sftlf_model_raw(self.var_in_file) self._model_file.mask = self.sftlf[self.obs_or_model]['raw'] self._model_file.target_mask = \ MV2.not_equal(self.sftlf['target_grid'], region_value) def get(self): ''' Gets the variable based on the region and level (if given) for the file from data_path, which is defined in the initalizer. ''' try: if self.level is None: data_model = self._model_file.get(self.var, var_in_file=self.var_in_file, region=self.region) else: data_model = self._model_file.get(self.var, var_in_file=self.var_in_file, level=self.level, region=self.region) return data_model except Exception as e: msg = 'Failed to get variables %s for versions: %s, error: %s' logging.error(msg % (self.var, self.obs_or_model, e)) raise RuntimeError('Need to skip model: %s' % self.obs_or_model) def get_var_in_file(self): ''' Based off the model_tweaks parameter, get the variable mapping. ''' tweaks = {} tweaks_all = {} if hasattr(self.parameter, 'model_tweaks'): tweaks = self.parameter.model_tweaks.get(self.obs_or_model, {}) tweaks_all = self.parameter.model_tweaks.get(None, {}) var_in_file = tweaks.get('variable_mapping', {}).get(self.var, None) if var_in_file is None: if hasattr(self.parameter, 'model_tweaks'): tweaks_all = self.parameter.model_tweaks.get(None, {}) var_in_file = tweaks_all.get('variable_mapping', {}).get(self.var, self.var) return var_in_file def create_sftlf_model_raw(self, var_in_file): ''' For the self.obs_or_model from the initializer, create a landSeaMask from cdutil for self.sftlf[self.obs_or_model]['raw'] value. ''' if not hasattr(self.parameter, 'generate_sftlf') or \ self.parameter.generate_sftlf is False: logging.info('Model %s does not have sftlf, skipping region: %s' % (self.obs_or_model, self.region)) raise RuntimeError( 'Model %s does not have sftlf, skipping region: %s' % (self.obs_or_model, self.region)) else: logging.info('Auto generating sftlf for model %s' % self._model_file()) if os.path.exists(self._model_file()): var_file = cdms2.open(self._model_file()) var = var_file[var_in_file] n = var.rank() - 2 # Minus lat and long sft = cdutil.generateLandSeaMask( var(*(slice(0, 1), ) * n)) * 100.0 sft[:] = sft.filled(100.0) self.sftlf[self.obs_or_model]['raw'] = sft var_file.close() logging.info('Auto generated sftlf for model %s' % self.obs_or_model) def hash(self): ''' Return a hash of the file. ''' return self._model_file.hash() def file_path(self): ''' Return the path of the file. ''' return self._model_file()
def output_interpolated_model_climatologies(self, test, test_data): ''' Save the netCDF file. ''' region_name = self.get_region_name_from_region(test.region) pth = os.path.join(self.parameter.test_clims_interpolated_output, region_name) clim_file = Base(pth, self.parameter.filename_output_template) logging.getLogger("pcmdi_metrics").info('Saving interpolated climatologies to: %s' % clim_file()) clim_file.level = self.out_file.level clim_file.model_version = test.obs_or_model clim_file.table = self.table_realm clim_file.period = self.parameter.period clim_file.case_id = self.parameter.case_id clim_file.set_target_grid( self.parameter.target_grid, self.regrid_tool, self.regrid_method) clim_file.variable = self.var clim_file.region = region_name clim_file.realization = self.parameter.realization DataSet.apply_custom_keys(clim_file, self.parameter.custom_keys, self.var) clim_file.write(test_data, type="nc", id=self.var)
class OutputMetrics(object): def __init__(self, parameter, var_name_long, obs_dict, sftlf): logging.getLogger("pcmdi_metrics").setLevel(LOG_LEVEL) self.parameter = parameter self.var_name_long = var_name_long self.obs_dict = obs_dict self.var = var_name_long.split('_')[0] self.sftlf = sftlf self.metrics_def_dictionary = {} self.metrics_dictionary = {} self.out_file = Base(self.parameter.metrics_output_path, self.parameter.output_json_template) self.regrid_method = '' self.regrid_tool = '' self.table_realm = '' self.realm = '' self.setup_regrid_and_realm_vars() self.setup_out_file() self.setup_metrics_dictionary() def setup_metrics_dictionary(self): ''' Initalize the results dict (metrics_dictionary) and the metrics documentation dict (metrics_def_dictionary) which is put in the results dict. ''' self.metrics_def_dictionary = collections.OrderedDict() self.metrics_dictionary = collections.OrderedDict() self.metrics_dictionary["DISCLAIMER"] = self.open_disclaimer() if self.parameter.user_notes is not None: self.metrics_dictionary["USER_NOTES"] = self.parameter.user_notes self.metrics_dictionary["RESULTS"] = collections.OrderedDict() self.metrics_dictionary["Variable"] = {} self.metrics_dictionary["Variable"]["id"] = self.var self.metrics_dictionary["json_version"] = '3.0' self.metrics_dictionary["References"] = {} self.metrics_dictionary["RegionalMasking"] = {} level = DataSet.calculate_level_from_var(self.var_name_long) if level is None: self.out_file.level = '' else: self.metrics_dictionary["Variable"]["level"] = level self.out_file.level = "-%i" % (int(level / 100.0)) def open_disclaimer(self): ''' Return the contents of disclaimer.txt. ''' f = DataSet.load_path_as_file_obj('disclaimer.txt') contents = f.read() f.close() return contents def setup_regrid_and_realm_vars(self): ''' Set the regrid_method, regrid_tool, table_realm, and realm based off the obs dict and var. ''' if DataSet.use_omon(self.obs_dict, self.var): self.regrid_method = self.parameter.regrid_method_ocn self.regrid_tool = self.parameter.regrid_tool_ocn self.table_realm = 'Omon' self.realm = "ocn" else: self.regrid_method = self.parameter.regrid_method self.regrid_tool = self.parameter.regrid_tool self.table_realm = 'Amon' self.realm = "atm" def setup_out_file(self): ''' Setup for the out_file, which outputs both the .json and .txt. ''' self.out_file.set_target_grid( self.parameter.target_grid, self.regrid_tool, self.regrid_method) self.out_file.variable = self.var self.out_file.realm = self.realm self.out_file.table = self.table_realm self.out_file.case_id = self.parameter.case_id DataSet.apply_custom_keys(self.out_file, self.parameter.custom_keys, self.var) def add_region(self, region): ''' Add a region to the metrics_dictionary. ''' self.metrics_dictionary['RegionalMasking'][self.get_region_name_from_region(region)] = region def calculate_and_output_metrics(self, ref, test): ''' Given ref and test (both either of type Observation or Model), compute the metrics. ''' if isinstance(self.obs_dict[self.var][ref.obs_or_model], basestring): self.obs_var_ref = self.obs_dict[self.var][self.obs_dict[self.var][ref.obs_or_model]] else: self.obs_var_ref = self.obs_dict[self.var][ref.obs_or_model] self.metrics_dictionary['References'][ref.obs_or_model] = self.obs_var_ref try: ref_data = ref() except Exception as e: msg = 'Error while processing observation %s for variables %s:\n\t%s' logging.getLogger("pcmdi_metrics").error(msg % (ref.obs_or_model, self.var, str(e))) if ref_data is None: # Something went bad! raise RuntimeError('Could not load reference {}'.format(ref.obs_or_model)) try: test_data = test() except RuntimeError: # THIS EXCEPTION IS RAISED TO BREAK OUT OF THE FOR LOOP IN PCMDI_DRIVER # THIS SHOULD BE A CUSTOM EXCEPTION (PrematureBreakError) raise RuntimeError('Need to skip model: %s' % test.obs_or_model) # Todo: Make this a fcn self.set_grid_in_metrics_dictionary(test_data) if ref_data.shape != test_data.shape: raise RuntimeError('Two data sets have different shapes. %s vs %s' % (ref_data.shape, test_data.shape)) self.set_simulation_desc(test, test_data) if ref.obs_or_model not in self.metrics_dictionary['RESULTS'][test.obs_or_model]: self.metrics_dictionary["RESULTS"][test.obs_or_model][ref.obs_or_model] = \ {'source': self.obs_dict[self.var][ref.obs_or_model]} parameter_realization = self.metrics_dictionary["RESULTS"][test.obs_or_model][ref.obs_or_model].\ get(self.parameter.realization, {}) if not self.parameter.dry_run: pr_rgn = pcmdi_metrics.pcmdi.compute_metrics(self.var_name_long, test_data, ref_data) # Calling compute_metrics with None for the model and obs returns # the definitions. self.metrics_def_dictionary.update( pcmdi_metrics.pcmdi.compute_metrics(self.var_name_long, None, None)) if hasattr(self.parameter, 'compute_custom_metrics'): pr_rgn.update( self.parameter.compute_custom_metrics(self.var_name_long, test_data, ref_data)) try: self.metrics_def_dictionary.update( self.parameter.compute_custom_metrics( self.var_name_long, None, None)) except Exception: self.metrics_def_dictionary.update( {'custom': self.parameter.compute_custom_metrics.__doc__}) parameter_realization[self.get_region_name_from_region(ref.region)] = collections.OrderedDict( (k, pr_rgn[k]) for k in sorted(pr_rgn.keys()) ) self.metrics_dictionary['RESULTS'][test.obs_or_model][ref.obs_or_model][self.parameter.realization] = \ parameter_realization if self.check_save_test_clim(ref): self.output_interpolated_model_climatologies(test, test_data) self.write_on_exit() def set_grid_in_metrics_dictionary(self, test_data): ''' Set the grid in metrics_dictionary. ''' grid = {} grid['RegridMethod'] = self.regrid_method grid['RegridTool'] = self.regrid_tool grid['GridName'] = self.parameter.target_grid grid['GridResolution'] = test_data.shape[1:] self.metrics_dictionary['GridInfo'] = grid def set_simulation_desc(self, test, test_data): ''' Fillout information for the output .json and .txt files. ''' self.metrics_dictionary["RESULTS"][test.obs_or_model] = \ self.metrics_dictionary["RESULTS"].get(test.obs_or_model, {}) if "SimulationDescription" not in \ self.metrics_dictionary["RESULTS"][test.obs_or_model]: descr = {"MIPTable": self.obs_var_ref["CMIP_CMOR_TABLE"], "Model": test.obs_or_model, } sim_descr_mapping = { "ModelActivity": "project_id", "ModellingGroup": "institute_id", "Experiment": "experiment", "ModelFreeSpace": "ModelFreeSpace", "Realization": "realization", "creation_date": "creation_date", } sim_descr_mapping.update( getattr(self.parameter, "simulation_description_mapping", {})) for att in list(sim_descr_mapping.keys()): nm = sim_descr_mapping[att] if not isinstance(nm, (list, tuple)): nm = ["%s", nm] fmt = nm[0] vals = [] for a in nm[1:]: # First trying from parameter file if hasattr(self.parameter, a): vals.append(getattr(self.parameter, a)) # Now fall back on file... else: f = cdms2.open(test.file_path()) if hasattr(f, a): try: vals.append(float(getattr(f, a))) except Exception: vals.append(getattr(f, a)) # Ok couldn't find it anywhere # setting to N/A else: vals.append("N/A") f.close() descr[att] = fmt % tuple(vals) self.metrics_dictionary["RESULTS"][test.obs_or_model]["units"] = \ getattr(test_data, "units", "N/A") self.metrics_dictionary["RESULTS"][test.obs_or_model]["SimulationDescription"] = descr self.metrics_dictionary["RESULTS"][test.obs_or_model]["InputClimatologyFileName"] = \ os.path.basename(test.file_path()) self.metrics_dictionary["RESULTS"][test.obs_or_model]["InputClimatologyMD5"] = test.hash() # Not just global # TODO Ask Charles if the below check is needed # if len(self.regions_dict[self.var]) > 1: self.metrics_dictionary["RESULTS"][test.obs_or_model][ "InputRegionFileName"] = \ self.sftlf[test.obs_or_model]["filename"] self.metrics_dictionary["RESULTS"][test.obs_or_model][ "InputRegionMD5"] = \ self.sftlf[test.obs_or_model]["md5"] def output_interpolated_model_climatologies(self, test, test_data): ''' Save the netCDF file. ''' region_name = self.get_region_name_from_region(test.region) pth = os.path.join(self.parameter.test_clims_interpolated_output, region_name) clim_file = Base(pth, self.parameter.filename_output_template) logging.getLogger("pcmdi_metrics").info('Saving interpolated climatologies to: %s' % clim_file()) clim_file.level = self.out_file.level clim_file.model_version = test.obs_or_model clim_file.table = self.table_realm clim_file.period = self.parameter.period clim_file.case_id = self.parameter.case_id clim_file.set_target_grid( self.parameter.target_grid, self.regrid_tool, self.regrid_method) clim_file.variable = self.var clim_file.region = region_name clim_file.realization = self.parameter.realization DataSet.apply_custom_keys(clim_file, self.parameter.custom_keys, self.var) clim_file.write(test_data, type="nc", id=self.var) def get_region_name_from_region(self, region): ''' Extract the region name from the region dict. ''' # region is both in ref and test region_name = region['id'] if region is None: region_name = 'global' return region_name def check_save_test_clim(self, ref): ''' Bunch of checks to see if the netCDF files are needed to be saved. ''' # Since we are only saving once per reference data set (it's always # the same after), we need to check if ref is the first value from the # parameter, hence we have ref.obs_or_model == reference_data_set[0] reference_data_set = self.parameter.reference_data_set reference_data_set = Observation.setup_obs_list_from_parameter( reference_data_set, self.obs_dict, self.var) return not self.parameter.dry_run and hasattr(self.parameter, 'save_test_clims') \ and self.parameter.save_test_clims is True and ref.obs_or_model == reference_data_set[0] # noqa def write_on_exit(self): ''' Output the metrics_dictionary as a json and text file. ''' self.metrics_dictionary['METRICS'] = self.metrics_def_dictionary if len(self.metrics_def_dictionary) == 0: raise RuntimeError("No results generated, cannot write to file") if not self.parameter.dry_run: logging.getLogger("pcmdi_metrics").info('Saving results to: %s' % self.out_file()) self.out_file.write(self.metrics_dictionary, json_structure=["model", "reference", "rip", "region", "statistic", "season"], indent=4, separators=(',', ': ')) self.out_file.write(self.metrics_dictionary, type='txt')
def create_sftlf(parameter): ''' Create the sftlf file from the parameter. ''' sftlf = {} for test in parameter.test_data_set: tmp_name = getattr(parameter, "sftlf_filename_template") if tmp_name is None: # Not defined from commandline or param file tmp_name = parameter.filename_template sft = Base(parameter.test_data_path, tmp_name) sft.model_version = test sft.table = "fx" sft.realm = "atmos" sft.period = getattr(parameter, 'period', '') sft.ext = "nc" sft.case_id = getattr(parameter, 'case_id', '') sft.target_grid = None sft.realization = "r0i0p0" DataSet.apply_custom_keys(sft, parameter.custom_keys, "sftlf") try: sftlf[test] = {"raw": sft.get("sftlf")} sftlf[test]["filename"] = os.path.basename(sft()) sftlf[test]["md5"] = sft.hash() except Exception: sftlf[test] = {"raw": None} sftlf[test]["filename"] = None sftlf[test]["md5"] = None if parameter.target_grid == "2.5x2.5": t_grid = cdms2.createUniformGrid(-88.875, 72, 2.5, 0, 144, 2.5) else: t_grid = parameter.target_grid sft = cdutil.generateLandSeaMask(t_grid) sft[:] = sft.filled(1.0) * 100.0 sftlf["target_grid"] = sft return sftlf