Exemplo n.º 1
0
 def create_model_file(self):
     """Creates an object that will eventually output the netCDF file."""
     self._model_file = Base(self.data_path,
                             self.parameter.filename_template)
     self._model_file.variable = self.var
     self._model_file.model_version = self.obs_or_model
     self._model_file.period = self.parameter.period
     self._model_file.ext = "nc"
     self._model_file.case_id = self.parameter.case_id
     self._model_file.realization = self.parameter.realization
     self.apply_custom_keys(self._model_file, self.parameter.custom_keys,
                            self.var)
Exemplo n.º 2
0
    def __init__(self, parameter, var_name_long, obs_dict, sftlf):
        logging.getLogger("pcmdi_metrics").setLevel(LOG_LEVEL)
        self.parameter = parameter
        self.var_name_long = var_name_long
        self.obs_dict = obs_dict
        self.var = var_name_long.split('_')[0]
        self.sftlf = sftlf

        self.metrics_def_dictionary = {}
        self.metrics_dictionary = {}

        self.out_file = Base(self.parameter.metrics_output_path, self.parameter.output_json_template)

        self.regrid_method = ''
        self.regrid_tool = ''
        self.table_realm = ''
        self.realm = ''
        self.setup_regrid_and_realm_vars()
        self.setup_out_file()
        self.setup_metrics_dictionary()
Exemplo n.º 3
0
    def __init__(self, parameter, var_name_long, obs_dict, sftlf):
        logging.basicConfig(level=logging.DEBUG)
        self.parameter = parameter
        self.var_name_long = var_name_long
        self.obs_dict = obs_dict
        self.var = var_name_long.split('_')[0]
        self.sftlf = sftlf

        self.metrics_def_dictionary = {}
        self.metrics_dictionary = {}

        string_template = "%(variable)%(level)_%(target_grid_name)_" +\
                          "%(regrid_tool)_%(regrid_method)_metrics"
        self.out_file = Base(self.parameter.metrics_output_path,
                             string_template)

        self.regrid_method = ''
        self.regrid_tool = ''
        self.table_realm = ''
        self.realm = ''
        self.setup_regrid_and_realm_vars()
        self.setup_out_file()
        self.setup_metrics_dictionary()
Exemplo n.º 4
0
    def __init__(self, parameter, var_name_long, obs_dict, sftlf):
        logging.getLogger("pcmdi_metrics").setLevel(LOG_LEVEL)
        self.parameter = parameter
        self.var_name_long = var_name_long
        self.obs_dict = obs_dict
        self.var = var_name_long.split('_')[0]
        self.sftlf = sftlf

        self.metrics_def_dictionary = {}
        self.metrics_dictionary = {}

        self.out_file = Base(self.parameter.metrics_output_path, self.parameter.output_json_template)

        self.regrid_method = ''
        self.regrid_tool = ''
        self.table_realm = ''
        self.realm = ''
        self.setup_regrid_and_realm_vars()
        self.setup_out_file()
        self.setup_metrics_dictionary()
Exemplo n.º 5
0
    def output_interpolated_model_climatologies(self, test, test_data):
        ''' Save the netCDF file. '''
        region_name = self.get_region_name_from_region(test.region)
        pth = os.path.join(self.parameter.test_clims_interpolated_output,
                           region_name)
        clim_file = Base(pth, self.parameter.filename_output_template)
        logging.getLogger("pcmdi_metrics").info(
            'Saving interpolated climatologies to: %s' % clim_file())
        clim_file.level = self.out_file.level
        clim_file.model_version = test.obs_or_model

        clim_file.table = self.table_realm
        clim_file.period = self.parameter.period
        clim_file.case_id = self.parameter.case_id
        clim_file.set_target_grid(self.parameter.target_grid, self.regrid_tool,
                                  self.regrid_method)
        clim_file.variable = self.var
        clim_file.region = region_name
        clim_file.realization = self.parameter.realization
        DataSet.apply_custom_keys(clim_file, self.parameter.custom_keys,
                                  self.var)
        clim_file.write(test_data, type="nc", id=self.var)
Exemplo n.º 6
0
class OutputMetrics(object):
    def __init__(self, parameter, var_name_long, obs_dict, sftlf):
        logging.getLogger("pcmdi_metrics").setLevel(LOG_LEVEL)
        self.parameter = parameter
        self.var_name_long = var_name_long
        self.obs_dict = obs_dict
        self.var = var_name_long.split('_')[0]
        self.sftlf = sftlf

        self.metrics_def_dictionary = {}
        self.metrics_dictionary = {}

        self.out_file = Base(self.parameter.metrics_output_path,
                             self.parameter.output_json_template)

        self.regrid_method = ''
        self.regrid_tool = ''
        self.table_realm = ''
        self.realm = ''
        self.setup_regrid_and_realm_vars()
        self.setup_out_file()
        self.setup_metrics_dictionary()

    def setup_metrics_dictionary(self):
        ''' Initalize the results dict (metrics_dictionary) and the metrics documentation
        dict (metrics_def_dictionary) which is put in the results dict. '''
        self.metrics_def_dictionary = collections.OrderedDict()
        self.metrics_dictionary = collections.OrderedDict()
        self.metrics_dictionary["DISCLAIMER"] = self.open_disclaimer()
        if self.parameter.user_notes is not None:
            self.metrics_dictionary["USER_NOTES"] = self.parameter.user_notes
        self.metrics_dictionary["RESULTS"] = collections.OrderedDict()

        self.metrics_dictionary["Variable"] = {}
        self.metrics_dictionary["Variable"]["id"] = self.var
        self.metrics_dictionary["json_version"] = '3.0'
        self.metrics_dictionary["References"] = {}
        self.metrics_dictionary["RegionalMasking"] = {}

        level = DataSet.calculate_level_from_var(self.var_name_long)
        if level is None:
            self.out_file.level = ''
        else:
            self.metrics_dictionary["Variable"]["level"] = level
            self.out_file.level = "-%i" % (int(level / 100.0))

    def open_disclaimer(self):
        ''' Return the contents of disclaimer.txt. '''
        f = DataSet.load_path_as_file_obj('disclaimer.txt')
        contents = f.read()
        f.close()
        return contents

    def setup_regrid_and_realm_vars(self):
        ''' Set the regrid_method, regrid_tool, table_realm,
        and realm based off the obs dict and var. '''
        if DataSet.use_omon(self.obs_dict, self.var):
            self.regrid_method = self.parameter.regrid_method_ocn
            self.regrid_tool = self.parameter.regrid_tool_ocn
            self.table_realm = 'Omon'
            self.realm = "ocn"
        else:
            self.regrid_method = self.parameter.regrid_method
            self.regrid_tool = self.parameter.regrid_tool
            self.table_realm = 'Amon'
            self.realm = "atm"

    def setup_out_file(self):
        ''' Setup for the out_file, which outputs both the .json and .txt. '''
        self.out_file.set_target_grid(self.parameter.target_grid,
                                      self.regrid_tool, self.regrid_method)
        self.out_file.variable = self.var
        self.out_file.realm = self.realm
        self.out_file.table = self.table_realm
        self.out_file.case_id = self.parameter.case_id
        if hasattr(self, "obs_or_model"):
            self.out_file.model_version = self.obs_or_model
        for key in self.out_file.keys():
            if hasattr(self.parameter, key):
                setattr(self.out_file, key, getattr(self.parameter, key))
            if hasattr(self, key):
                setattr(self.out_file, key, getattr(self, key))

        DataSet.apply_custom_keys(self.out_file, self.parameter.custom_keys,
                                  self.var)

    def add_region(self, region):
        ''' Add a region to the metrics_dictionary. '''
        self.metrics_dictionary['RegionalMasking'][
            self.get_region_name_from_region(region)] = region

    def calculate_and_output_metrics(self, ref, test):
        ''' Given ref and test (both either of type Observation or Model), compute the metrics. '''
        if isinstance(self.obs_dict[self.var][ref.obs_or_model], basestring):
            self.obs_var_ref = self.obs_dict[self.var][self.obs_dict[self.var][
                ref.obs_or_model]]
        else:
            self.obs_var_ref = self.obs_dict[self.var][ref.obs_or_model]

        self.metrics_dictionary['References'][
            ref.obs_or_model] = self.obs_var_ref

        try:
            ref_data = ref()
        except Exception as e:
            msg = 'Error while processing observation %s for variables %s:\n\t%s'
            logging.getLogger("pcmdi_metrics").error(
                msg % (ref.obs_or_model, self.var, str(e)))

        if ref_data is None:  # Something went bad!
            raise RuntimeError('Could not load reference {}'.format(
                ref.obs_or_model))

        try:
            test_data = test()
        except RuntimeError:
            # THIS EXCEPTION IS RAISED TO BREAK OUT OF THE FOR LOOP IN PCMDI_DRIVER
            # THIS SHOULD BE A CUSTOM EXCEPTION (PrematureBreakError)
            raise RuntimeError('Need to skip model: %s' % test.obs_or_model)

        # Todo: Make this a fcn
        self.set_grid_in_metrics_dictionary(test_data)

        if ref_data.shape != test_data.shape:
            raise RuntimeError(
                'Two data sets have different shapes. %s vs %s' %
                (ref_data.shape, test_data.shape))

        self.set_simulation_desc(test, test_data)

        if ref.obs_or_model not in self.metrics_dictionary['RESULTS'][
                test.obs_or_model]:
            self.metrics_dictionary["RESULTS"][test.obs_or_model][ref.obs_or_model] = \
                {'source': self.obs_dict[self.var][ref.obs_or_model]}

        parameter_realization = self.metrics_dictionary["RESULTS"][test.obs_or_model][ref.obs_or_model].\
            get(self.parameter.realization, {})

        if not self.parameter.dry_run:
            pr_rgn = pcmdi_metrics.pcmdi.compute_metrics(
                self.var_name_long, test_data, ref_data)

            # Calling compute_metrics with None for the model and obs returns
            # the definitions.
            self.metrics_def_dictionary.update(
                pcmdi_metrics.pcmdi.compute_metrics(self.var_name_long, None,
                                                    None))
            if hasattr(self.parameter, 'compute_custom_metrics'):
                pr_rgn.update(
                    self.parameter.compute_custom_metrics(
                        self.var_name_long, test_data, ref_data))
                try:
                    self.metrics_def_dictionary.update(
                        self.parameter.compute_custom_metrics(
                            self.var_name_long, None, None))
                except Exception:
                    self.metrics_def_dictionary.update({
                        'custom':
                        self.parameter.compute_custom_metrics.__doc__
                    })

            parameter_realization[self.get_region_name_from_region(
                ref.region)] = collections.OrderedDict(
                    (k, pr_rgn[k]) for k in sorted(pr_rgn.keys()))

            self.metrics_dictionary['RESULTS'][test.obs_or_model][ref.obs_or_model][self.parameter.realization] = \
                parameter_realization

        if self.check_save_test_clim(ref):
            self.output_interpolated_model_climatologies(test, test_data)

        self.write_on_exit()

    def set_grid_in_metrics_dictionary(self, test_data):
        ''' Set the grid in metrics_dictionary. '''
        grid = {}
        grid['RegridMethod'] = self.regrid_method
        grid['RegridTool'] = self.regrid_tool
        grid['GridName'] = self.parameter.target_grid
        grid['GridResolution'] = test_data.shape[1:]
        self.metrics_dictionary['GridInfo'] = grid

    def set_simulation_desc(self, test, test_data):
        ''' Fillout information for the output .json and .txt files. '''
        self.metrics_dictionary["RESULTS"][test.obs_or_model] = \
            self.metrics_dictionary["RESULTS"].get(test.obs_or_model, {})
        if "SimulationDescription" not in \
                self.metrics_dictionary["RESULTS"][test.obs_or_model]:

            descr = {
                "MIPTable": self.obs_var_ref["CMIP_CMOR_TABLE"],
                "Model": test.obs_or_model,
            }
            sim_descr_mapping = {
                "ModelActivity": "project_id",
                "ModellingGroup": "institute_id",
                "Experiment": "experiment",
                "ModelFreeSpace": "ModelFreeSpace",
                "Realization": "realization",
                "creation_date": "creation_date",
            }
            sim_descr_mapping.update(
                getattr(self.parameter, "simulation_description_mapping", {}))

            for att in list(sim_descr_mapping.keys()):
                nm = sim_descr_mapping[att]
                if not isinstance(nm, (list, tuple)):
                    nm = ["%s", nm]
                fmt = nm[0]
                vals = []
                for a in nm[1:]:
                    # First trying from parameter file
                    if hasattr(self.parameter, a):
                        vals.append(getattr(self.parameter, a))
                    # Now fall back on file...
                    else:
                        f = cdms2.open(test.file_path())
                        if hasattr(f, a):
                            try:
                                vals.append(float(getattr(f, a)))
                            except Exception:
                                vals.append(getattr(f, a))
                        # Ok couldn't find it anywhere
                        # setting to N/A
                        else:
                            vals.append("N/A")
                        f.close()
                descr[att] = fmt % tuple(vals)

            self.metrics_dictionary["RESULTS"][test.obs_or_model]["units"] = \
                getattr(test_data, "units", "N/A")
            self.metrics_dictionary["RESULTS"][
                test.obs_or_model]["SimulationDescription"] = descr

            self.metrics_dictionary["RESULTS"][test.obs_or_model]["InputClimatologyFileName"] = \
                os.path.basename(test.file_path())
            self.metrics_dictionary["RESULTS"][
                test.obs_or_model]["InputClimatologyMD5"] = test.hash()
            # Not just global
            # TODO Ask Charles if the below check is needed
            # if len(self.regions_dict[self.var]) > 1:
            self.metrics_dictionary["RESULTS"][test.obs_or_model][
                "InputRegionFileName"] = \
                self.sftlf[test.obs_or_model]["filename"]
            self.metrics_dictionary["RESULTS"][test.obs_or_model][
                "InputRegionMD5"] = \
                self.sftlf[test.obs_or_model]["md5"]

    def output_interpolated_model_climatologies(self, test, test_data):
        ''' Save the netCDF file. '''
        region_name = self.get_region_name_from_region(test.region)
        pth = os.path.join(self.parameter.test_clims_interpolated_output,
                           region_name)
        clim_file = Base(pth, self.parameter.filename_output_template)
        logging.getLogger("pcmdi_metrics").info(
            'Saving interpolated climatologies to: %s' % clim_file())
        clim_file.level = self.out_file.level
        clim_file.model_version = test.obs_or_model

        clim_file.table = self.table_realm
        clim_file.period = self.parameter.period
        clim_file.case_id = self.parameter.case_id
        clim_file.set_target_grid(self.parameter.target_grid, self.regrid_tool,
                                  self.regrid_method)
        clim_file.variable = self.var
        clim_file.region = region_name
        clim_file.realization = self.parameter.realization
        DataSet.apply_custom_keys(clim_file, self.parameter.custom_keys,
                                  self.var)
        clim_file.write(test_data, type="nc", id=self.var)

    def get_region_name_from_region(self, region):
        ''' Extract the region name from the region dict. '''
        # region is both in ref and test
        region_name = region['id']
        if region is None:
            region_name = 'global'
        return region_name

    def check_save_test_clim(self, ref):
        ''' Bunch of checks to see if the netCDF files are needed to be saved. '''
        # Since we are only saving once per reference data set (it's always
        # the same after), we need to check if ref is the first value from the
        # parameter, hence we have ref.obs_or_model == reference_data_set[0]
        reference_data_set = self.parameter.reference_data_set
        reference_data_set = Observation.setup_obs_list_from_parameter(
            reference_data_set, self.obs_dict, self.var)
        return not self.parameter.dry_run and hasattr(self.parameter, 'save_test_clims') \
               and self.parameter.save_test_clims is True and ref.obs_or_model == reference_data_set[0]  # noqa

    def write_on_exit(self):
        ''' Output the metrics_dictionary as a json and text file. '''
        self.setup_out_file()
        self.metrics_dictionary['METRICS'] = self.metrics_def_dictionary
        if len(self.metrics_def_dictionary) == 0:
            raise RuntimeError("No results generated, cannot write to file")
        if not self.parameter.dry_run:
            logging.getLogger("pcmdi_metrics").info('Saving results to: %s' %
                                                    self.out_file())
            self.out_file.write(self.metrics_dictionary,
                                json_structure=[
                                    "model", "reference", "rip", "region",
                                    "statistic", "season"
                                ],
                                indent=4,
                                separators=(',', ': '),
                                mode="r+")
Exemplo n.º 7
0
    def create_sftlf(parameter):
        """Create the sftlf file from the parameter."""
        sftlf = {}

        for test in parameter.test_data_set:
            tmp_name = getattr(parameter, "sftlf_filename_template")
            if tmp_name is None:  # Not defined from commandline or param file
                tmp_name = parameter.filename_template
            sft = Base(parameter.test_data_path, tmp_name)
            sft.model_version = test
            sft.table = "fx"
            sft.realm = "atmos"
            sft.period = getattr(parameter, "period", "")
            sft.ext = "nc"
            sft.case_id = getattr(parameter, "case_id", "")
            sft.target_grid = None
            sft.realization = "r0i0p0"
            DataSet.apply_custom_keys(sft, parameter.custom_keys, "sftlf")
            try:
                sftlf[test] = {"raw": sft.get("sftlf")}
                sftlf[test]["filename"] = os.path.basename(sft())
                sftlf[test]["md5"] = sft.hash()
            except Exception:
                sftlf[test] = {"raw": None}
                sftlf[test]["filename"] = None
                sftlf[test]["md5"] = None
        if parameter.target_grid == "2.5x2.5":
            t_grid = cdms2.createUniformGrid(-88.875, 72, 2.5, 0, 144, 2.5)
        else:
            t_grid = parameter.target_grid

        sft = cdutil.generateLandSeaMask(t_grid)
        sft[:] = sft.filled(1.0) * 100.0
        sftlf["target_grid"] = sft

        return sftlf
Exemplo n.º 8
0
class Model(pcmdi_metrics.driver.dataset.DataSet):
    """Handles all the computation (setting masking, target grid, etc)
    and some file I/O related to models."""
    def __init__(self, parameter, var_name_long, region, model, obs_dict,
                 data_path, sftlf):
        super(Model, self).__init__(parameter, var_name_long, region, obs_dict,
                                    data_path, sftlf)
        logging.getLogger("pcmdi_metrics").setLevel(LOG_LEVEL)

        self._model_file = None
        self.var_in_file = None
        self.obs_or_model = model
        self.create_model_file()
        self.setup_target_grid(self._model_file)
        self.setup_target_mask()

    def create_model_file(self):
        """Creates an object that will eventually output the netCDF file."""
        self._model_file = Base(self.data_path,
                                self.parameter.filename_template)
        self._model_file.variable = self.var
        self._model_file.model_version = self.obs_or_model
        self._model_file.period = self.parameter.period
        self._model_file.ext = "nc"
        self._model_file.case_id = self.parameter.case_id
        self._model_file.realization = self.parameter.realization
        self.apply_custom_keys(self._model_file, self.parameter.custom_keys,
                               self.var)

    def setup_target_mask(self):
        """Sets the mask and target_mask attribute of self._model_file"""
        self.var_in_file = self.get_var_in_file()

        if self.region is not None:
            region_value = self.region.get("value", None)
            if region_value is not None:
                if self.sftlf[self.obs_or_model]["raw"] is None:
                    self.create_sftlf_model_raw(self.var_in_file)

                self._model_file.mask = self.sftlf[self.obs_or_model]["raw"]
                self._model_file.target_mask = MV2.not_equal(
                    self.sftlf["target_grid"], region_value)

    def get(self):
        """Gets the variable based on the region and level (if given) for
        the file from data_path, which is defined in the initalizer."""
        try:
            if self.level is None:
                data_model = self._model_file.get(self.var,
                                                  var_in_file=self.var_in_file,
                                                  region=self.region)
            else:
                data_model = self._model_file.get(
                    self.var,
                    var_in_file=self.var_in_file,
                    level=self.level,
                    region=self.region,
                )

            return data_model

        except Exception as e:
            msg = "Failed to get variables %s for versions: %s, error: %s"
            logging.getLogger("pcmdi_metrics").error(
                msg % (self.var, self.obs_or_model, e))
            raise RuntimeError("Need to skip model: %s" % self.obs_or_model)

    def get_var_in_file(self):
        """Based off the model_tweaks parameter, get the variable mapping."""
        tweaks = {}
        tweaks_all = {}
        if hasattr(self.parameter, "model_tweaks"):
            tweaks = self.parameter.model_tweaks.get(self.obs_or_model, {})
            tweaks_all = self.parameter.model_tweaks.get(None, {})
        var_in_file = tweaks.get("variable_mapping", {}).get(self.var, None)

        if var_in_file is None:
            if hasattr(self.parameter, "model_tweaks"):
                tweaks_all = self.parameter.model_tweaks.get(None, {})
            var_in_file = tweaks_all.get("variable_mapping",
                                         {}).get(self.var, self.var)

        return var_in_file

    def create_sftlf_model_raw(self, var_in_file):
        """For the self.obs_or_model from the initializer, create a landSeaMask
        from cdutil for self.sftlf[self.obs_or_model]['raw'] value."""
        if (not hasattr(self.parameter, "generate_sftlf")
                or self.parameter.generate_sftlf is False):
            logging.getLogger("pcmdi_metrics").info(
                "Model %s does not have sftlf, skipping region: %s" %
                (self.obs_or_model, self.region))
            raise RuntimeError(
                "Model %s does not have sftlf, skipping region: %s" %
                (self.obs_or_model, self.region))

        else:
            logging.getLogger("pcmdi_metrics").info(
                "Auto generating sftlf for model %s" % self._model_file())
            if os.path.exists(self._model_file()):
                var_file = cdms2.open(self._model_file())
                var = var_file[var_in_file]
                n = var.rank() - 2  # Minus lat and long
                sft = cdutil.generateLandSeaMask(
                    var(*(slice(0, 1), ) * n)) * 100.0
                sft[:] = sft.filled(100.0)
                self.sftlf[self.obs_or_model]["raw"] = sft
                var_file.close()
                logging.getLogger("pcmdi_metrics").info(
                    "Auto generated sftlf for model %s" % self.obs_or_model)

    def hash(self):
        """Return a hash of the file."""
        return self._model_file.hash()

    def file_path(self):
        """Return the path of the file."""
        return self._model_file()
Exemplo n.º 9
0
class Model(pcmdi_metrics.driver.dataset.DataSet):
    ''' Handles all the computation (setting masking, target grid, etc)
    and some file I/O related to models. '''
    def __init__(self, parameter, var_name_long, region, model, obs_dict,
                 data_path, sftlf):
        super(Model, self).__init__(parameter, var_name_long, region, obs_dict,
                                    data_path, sftlf)
        logging.basicConfig(level=logging.DEBUG)

        self._model_file = None
        self.var_in_file = None
        self.obs_or_model = model
        self.create_model_file()
        self.setup_target_grid(self._model_file)
        self.setup_target_mask()

    def create_model_file(self):
        ''' Creates an object that will eventually output the netCDF file. '''
        self._model_file = Base(self.data_path,
                                self.parameter.filename_template)
        self._model_file.variable = self.var
        self._model_file.model_version = self.obs_or_model
        self._model_file.period = self.parameter.period
        self._model_file.ext = 'nc'
        self._model_file.case_id = self.parameter.case_id
        self._model_file.realization = self.parameter.realization
        self.apply_custom_keys(self._model_file, self.parameter.custom_keys,
                               self.var)

    def setup_target_mask(self):
        ''' Sets the mask and target_mask attribute of self._model_file '''
        self.var_in_file = self.get_var_in_file()

        if self.region is not None:
            region_value = self.region.get('value', None)
            if region_value is not None:
                if self.sftlf[self.obs_or_model]['raw'] is None:
                    self.create_sftlf_model_raw(self.var_in_file)

                self._model_file.mask = self.sftlf[self.obs_or_model]['raw']
                self._model_file.target_mask = \
                    MV2.not_equal(self.sftlf['target_grid'], region_value)

    def get(self):
        ''' Gets the variable based on the region and level (if given) for
        the file from data_path, which is defined in the initalizer. '''
        try:
            if self.level is None:
                data_model = self._model_file.get(self.var,
                                                  var_in_file=self.var_in_file,
                                                  region=self.region)
            else:
                data_model = self._model_file.get(self.var,
                                                  var_in_file=self.var_in_file,
                                                  level=self.level,
                                                  region=self.region)

            return data_model

        except Exception as e:
            msg = 'Failed to get variables %s for versions: %s, error: %s'
            logging.error(msg % (self.var, self.obs_or_model, e))
            raise RuntimeError('Need to skip model: %s' % self.obs_or_model)

    def get_var_in_file(self):
        ''' Based off the model_tweaks parameter, get the variable mapping. '''
        tweaks = {}
        tweaks_all = {}
        if hasattr(self.parameter, 'model_tweaks'):
            tweaks = self.parameter.model_tweaks.get(self.obs_or_model, {})
            tweaks_all = self.parameter.model_tweaks.get(None, {})
        var_in_file = tweaks.get('variable_mapping', {}).get(self.var, None)

        if var_in_file is None:
            if hasattr(self.parameter, 'model_tweaks'):
                tweaks_all = self.parameter.model_tweaks.get(None, {})
            var_in_file = tweaks_all.get('variable_mapping',
                                         {}).get(self.var, self.var)

        return var_in_file

    def create_sftlf_model_raw(self, var_in_file):
        ''' For the self.obs_or_model from the initializer, create a landSeaMask
        from cdutil for self.sftlf[self.obs_or_model]['raw'] value. '''
        if not hasattr(self.parameter, 'generate_sftlf') or \
           self.parameter.generate_sftlf is False:
            logging.info('Model %s does not have sftlf, skipping region: %s' %
                         (self.obs_or_model, self.region))
            raise RuntimeError(
                'Model %s does not have sftlf, skipping region: %s' %
                (self.obs_or_model, self.region))

        else:
            logging.info('Auto generating sftlf for model %s' %
                         self._model_file())
            if os.path.exists(self._model_file()):
                var_file = cdms2.open(self._model_file())
                var = var_file[var_in_file]
                n = var.rank() - 2  # Minus lat and long
                sft = cdutil.generateLandSeaMask(
                    var(*(slice(0, 1), ) * n)) * 100.0
                sft[:] = sft.filled(100.0)
                self.sftlf[self.obs_or_model]['raw'] = sft
                var_file.close()
                logging.info('Auto generated sftlf for model %s' %
                             self.obs_or_model)

    def hash(self):
        ''' Return a hash of the file. '''
        return self._model_file.hash()

    def file_path(self):
        ''' Return the path of the file. '''
        return self._model_file()
Exemplo n.º 10
0
    def output_interpolated_model_climatologies(self, test, test_data):
        ''' Save the netCDF file. '''
        region_name = self.get_region_name_from_region(test.region)
        pth = os.path.join(self.parameter.test_clims_interpolated_output,
                           region_name)
        clim_file = Base(pth, self.parameter.filename_output_template)
        logging.getLogger("pcmdi_metrics").info('Saving interpolated climatologies to: %s' % clim_file())
        clim_file.level = self.out_file.level
        clim_file.model_version = test.obs_or_model

        clim_file.table = self.table_realm
        clim_file.period = self.parameter.period
        clim_file.case_id = self.parameter.case_id
        clim_file.set_target_grid(
            self.parameter.target_grid,
            self.regrid_tool,
            self.regrid_method)
        clim_file.variable = self.var
        clim_file.region = region_name
        clim_file.realization = self.parameter.realization
        DataSet.apply_custom_keys(clim_file, self.parameter.custom_keys, self.var)
        clim_file.write(test_data, type="nc", id=self.var)
Exemplo n.º 11
0
class OutputMetrics(object):

    def __init__(self, parameter, var_name_long, obs_dict, sftlf):
        logging.getLogger("pcmdi_metrics").setLevel(LOG_LEVEL)
        self.parameter = parameter
        self.var_name_long = var_name_long
        self.obs_dict = obs_dict
        self.var = var_name_long.split('_')[0]
        self.sftlf = sftlf

        self.metrics_def_dictionary = {}
        self.metrics_dictionary = {}

        self.out_file = Base(self.parameter.metrics_output_path, self.parameter.output_json_template)

        self.regrid_method = ''
        self.regrid_tool = ''
        self.table_realm = ''
        self.realm = ''
        self.setup_regrid_and_realm_vars()
        self.setup_out_file()
        self.setup_metrics_dictionary()

    def setup_metrics_dictionary(self):
        ''' Initalize the results dict (metrics_dictionary) and the metrics documentation
        dict (metrics_def_dictionary) which is put in the results dict. '''
        self.metrics_def_dictionary = collections.OrderedDict()
        self.metrics_dictionary = collections.OrderedDict()
        self.metrics_dictionary["DISCLAIMER"] = self.open_disclaimer()
        if self.parameter.user_notes is not None:
            self.metrics_dictionary["USER_NOTES"] = self.parameter.user_notes
        self.metrics_dictionary["RESULTS"] = collections.OrderedDict()

        self.metrics_dictionary["Variable"] = {}
        self.metrics_dictionary["Variable"]["id"] = self.var
        self.metrics_dictionary["json_version"] = '3.0'
        self.metrics_dictionary["References"] = {}
        self.metrics_dictionary["RegionalMasking"] = {}

        level = DataSet.calculate_level_from_var(self.var_name_long)
        if level is None:
            self.out_file.level = ''
        else:
            self.metrics_dictionary["Variable"]["level"] = level
            self.out_file.level = "-%i" % (int(level / 100.0))

    def open_disclaimer(self):
        ''' Return the contents of disclaimer.txt. '''
        f = DataSet.load_path_as_file_obj('disclaimer.txt')
        contents = f.read()
        f.close()
        return contents

    def setup_regrid_and_realm_vars(self):
        ''' Set the regrid_method, regrid_tool, table_realm,
        and realm based off the obs dict and var. '''
        if DataSet.use_omon(self.obs_dict, self.var):
            self.regrid_method = self.parameter.regrid_method_ocn
            self.regrid_tool = self.parameter.regrid_tool_ocn
            self.table_realm = 'Omon'
            self.realm = "ocn"
        else:
            self.regrid_method = self.parameter.regrid_method
            self.regrid_tool = self.parameter.regrid_tool
            self.table_realm = 'Amon'
            self.realm = "atm"

    def setup_out_file(self):
        ''' Setup for the out_file, which outputs both the .json and .txt. '''
        self.out_file.set_target_grid(
            self.parameter.target_grid, self.regrid_tool, self.regrid_method)
        self.out_file.variable = self.var
        self.out_file.realm = self.realm
        self.out_file.table = self.table_realm
        self.out_file.case_id = self.parameter.case_id
        DataSet.apply_custom_keys(self.out_file, self.parameter.custom_keys, self.var)

    def add_region(self, region):
        ''' Add a region to the metrics_dictionary. '''
        self.metrics_dictionary['RegionalMasking'][self.get_region_name_from_region(region)] = region

    def calculate_and_output_metrics(self, ref, test):
        ''' Given ref and test (both either of type Observation or Model), compute the metrics. '''
        if isinstance(self.obs_dict[self.var][ref.obs_or_model], basestring):
            self.obs_var_ref = self.obs_dict[self.var][self.obs_dict[self.var][ref.obs_or_model]]
        else:
            self.obs_var_ref = self.obs_dict[self.var][ref.obs_or_model]

        self.metrics_dictionary['References'][ref.obs_or_model] = self.obs_var_ref

        try:
            ref_data = ref()
        except Exception as e:
            msg = 'Error while processing observation %s for variables %s:\n\t%s'
            logging.getLogger("pcmdi_metrics").error(msg % (ref.obs_or_model, self.var, str(e)))

        if ref_data is None:  # Something went bad!
            raise RuntimeError('Could not load reference {}'.format(ref.obs_or_model))

        try:
            test_data = test()
        except RuntimeError:
            # THIS EXCEPTION IS RAISED TO BREAK OUT OF THE FOR LOOP IN PCMDI_DRIVER
            # THIS SHOULD BE A CUSTOM EXCEPTION (PrematureBreakError)
            raise RuntimeError('Need to skip model: %s' % test.obs_or_model)

        # Todo: Make this a fcn
        self.set_grid_in_metrics_dictionary(test_data)

        if ref_data.shape != test_data.shape:
            raise RuntimeError('Two data sets have different shapes. %s vs %s' % (ref_data.shape, test_data.shape))

        self.set_simulation_desc(test, test_data)

        if ref.obs_or_model not in self.metrics_dictionary['RESULTS'][test.obs_or_model]:
            self.metrics_dictionary["RESULTS"][test.obs_or_model][ref.obs_or_model] = \
                {'source': self.obs_dict[self.var][ref.obs_or_model]}

        parameter_realization = self.metrics_dictionary["RESULTS"][test.obs_or_model][ref.obs_or_model].\
            get(self.parameter.realization, {})

        if not self.parameter.dry_run:
            pr_rgn = pcmdi_metrics.pcmdi.compute_metrics(self.var_name_long, test_data, ref_data)

            # Calling compute_metrics with None for the model and obs returns
            # the definitions.
            self.metrics_def_dictionary.update(
                pcmdi_metrics.pcmdi.compute_metrics(self.var_name_long, None, None))
            if hasattr(self.parameter, 'compute_custom_metrics'):
                pr_rgn.update(
                    self.parameter.compute_custom_metrics(self.var_name_long,
                                                          test_data, ref_data))
                try:
                    self.metrics_def_dictionary.update(
                        self.parameter.compute_custom_metrics(
                            self.var_name_long, None, None))
                except Exception:
                    self.metrics_def_dictionary.update(
                        {'custom': self.parameter.compute_custom_metrics.__doc__})

            parameter_realization[self.get_region_name_from_region(ref.region)] = collections.OrderedDict(
                (k, pr_rgn[k]) for k in sorted(pr_rgn.keys())
            )

            self.metrics_dictionary['RESULTS'][test.obs_or_model][ref.obs_or_model][self.parameter.realization] = \
                parameter_realization

        if self.check_save_test_clim(ref):
            self.output_interpolated_model_climatologies(test, test_data)

        self.write_on_exit()

    def set_grid_in_metrics_dictionary(self, test_data):
        ''' Set the grid in metrics_dictionary. '''
        grid = {}
        grid['RegridMethod'] = self.regrid_method
        grid['RegridTool'] = self.regrid_tool
        grid['GridName'] = self.parameter.target_grid
        grid['GridResolution'] = test_data.shape[1:]
        self.metrics_dictionary['GridInfo'] = grid

    def set_simulation_desc(self, test, test_data):
        ''' Fillout information for the output .json and .txt files. '''
        self.metrics_dictionary["RESULTS"][test.obs_or_model] = \
            self.metrics_dictionary["RESULTS"].get(test.obs_or_model, {})
        if "SimulationDescription" not in \
                self.metrics_dictionary["RESULTS"][test.obs_or_model]:

            descr = {"MIPTable": self.obs_var_ref["CMIP_CMOR_TABLE"],
                     "Model": test.obs_or_model,
                     }
            sim_descr_mapping = {
                "ModelActivity": "project_id",
                "ModellingGroup": "institute_id",
                "Experiment": "experiment",
                "ModelFreeSpace": "ModelFreeSpace",
                "Realization": "realization",
                "creation_date": "creation_date",
            }
            sim_descr_mapping.update(
                getattr(self.parameter, "simulation_description_mapping", {}))

            for att in list(sim_descr_mapping.keys()):
                nm = sim_descr_mapping[att]
                if not isinstance(nm, (list, tuple)):
                    nm = ["%s", nm]
                fmt = nm[0]
                vals = []
                for a in nm[1:]:
                    # First trying from parameter file
                    if hasattr(self.parameter, a):
                        vals.append(getattr(self.parameter, a))
                    # Now fall back on file...
                    else:
                        f = cdms2.open(test.file_path())
                        if hasattr(f, a):
                            try:
                                vals.append(float(getattr(f, a)))
                            except Exception:
                                vals.append(getattr(f, a))
                        # Ok couldn't find it anywhere
                        # setting to N/A
                        else:
                            vals.append("N/A")
                        f.close()
                descr[att] = fmt % tuple(vals)

            self.metrics_dictionary["RESULTS"][test.obs_or_model]["units"] = \
                getattr(test_data, "units", "N/A")
            self.metrics_dictionary["RESULTS"][test.obs_or_model]["SimulationDescription"] = descr

            self.metrics_dictionary["RESULTS"][test.obs_or_model]["InputClimatologyFileName"] = \
                os.path.basename(test.file_path())
            self.metrics_dictionary["RESULTS"][test.obs_or_model]["InputClimatologyMD5"] = test.hash()
            # Not just global
            # TODO Ask Charles if the below check is needed
            # if len(self.regions_dict[self.var]) > 1:
            self.metrics_dictionary["RESULTS"][test.obs_or_model][
                "InputRegionFileName"] = \
                self.sftlf[test.obs_or_model]["filename"]
            self.metrics_dictionary["RESULTS"][test.obs_or_model][
                "InputRegionMD5"] = \
                self.sftlf[test.obs_or_model]["md5"]

    def output_interpolated_model_climatologies(self, test, test_data):
        ''' Save the netCDF file. '''
        region_name = self.get_region_name_from_region(test.region)
        pth = os.path.join(self.parameter.test_clims_interpolated_output,
                           region_name)
        clim_file = Base(pth, self.parameter.filename_output_template)
        logging.getLogger("pcmdi_metrics").info('Saving interpolated climatologies to: %s' % clim_file())
        clim_file.level = self.out_file.level
        clim_file.model_version = test.obs_or_model

        clim_file.table = self.table_realm
        clim_file.period = self.parameter.period
        clim_file.case_id = self.parameter.case_id
        clim_file.set_target_grid(
            self.parameter.target_grid,
            self.regrid_tool,
            self.regrid_method)
        clim_file.variable = self.var
        clim_file.region = region_name
        clim_file.realization = self.parameter.realization
        DataSet.apply_custom_keys(clim_file, self.parameter.custom_keys, self.var)
        clim_file.write(test_data, type="nc", id=self.var)

    def get_region_name_from_region(self, region):
        ''' Extract the region name from the region dict. '''
        # region is both in ref and test
        region_name = region['id']
        if region is None:
            region_name = 'global'
        return region_name

    def check_save_test_clim(self, ref):
        ''' Bunch of checks to see if the netCDF files are needed to be saved. '''
        # Since we are only saving once per reference data set (it's always
        # the same after), we need to check if ref is the first value from the
        # parameter, hence we have ref.obs_or_model == reference_data_set[0]
        reference_data_set = self.parameter.reference_data_set
        reference_data_set = Observation.setup_obs_list_from_parameter(
            reference_data_set, self.obs_dict, self.var)
        return not self.parameter.dry_run and hasattr(self.parameter, 'save_test_clims') \
               and self.parameter.save_test_clims is True and ref.obs_or_model == reference_data_set[0]  # noqa

    def write_on_exit(self):
        ''' Output the metrics_dictionary as a json and text file. '''
        self.metrics_dictionary['METRICS'] = self.metrics_def_dictionary
        if len(self.metrics_def_dictionary) == 0:
            raise RuntimeError("No results generated, cannot write to file")
        if not self.parameter.dry_run:
            logging.getLogger("pcmdi_metrics").info('Saving results to: %s' % self.out_file())
            self.out_file.write(self.metrics_dictionary,
                                json_structure=["model", "reference", "rip", "region", "statistic", "season"],
                                indent=4,
                                separators=(',', ': '))
            self.out_file.write(self.metrics_dictionary, type='txt')
Exemplo n.º 12
0
    def create_sftlf(parameter):
        ''' Create the sftlf file from the parameter. '''
        sftlf = {}

        for test in parameter.test_data_set:
            tmp_name = getattr(parameter, "sftlf_filename_template")
            if tmp_name is None:  # Not defined from commandline or param file
                tmp_name = parameter.filename_template
            sft = Base(parameter.test_data_path, tmp_name)
            sft.model_version = test
            sft.table = "fx"
            sft.realm = "atmos"
            sft.period = getattr(parameter, 'period', '')
            sft.ext = "nc"
            sft.case_id = getattr(parameter, 'case_id', '')
            sft.target_grid = None
            sft.realization = "r0i0p0"
            DataSet.apply_custom_keys(sft, parameter.custom_keys, "sftlf")
            try:
                sftlf[test] = {"raw": sft.get("sftlf")}
                sftlf[test]["filename"] = os.path.basename(sft())
                sftlf[test]["md5"] = sft.hash()
            except Exception:
                sftlf[test] = {"raw": None}
                sftlf[test]["filename"] = None
                sftlf[test]["md5"] = None
        if parameter.target_grid == "2.5x2.5":
            t_grid = cdms2.createUniformGrid(-88.875, 72, 2.5, 0, 144, 2.5)
        else:
            t_grid = parameter.target_grid

        sft = cdutil.generateLandSeaMask(t_grid)
        sft[:] = sft.filled(1.0) * 100.0
        sftlf["target_grid"] = sft

        return sftlf