def test_some_lookups(self): x = cmip6.CMIP6_CVs() self.assertEqual('NCC', x.lookup('NorCPM1', 'source_id', 'institution_id')) self.assertCountEqual([ 'NorCPM1', 'NorESM2-LMEC', 'NorESM2-HH', 'NorESM1-F', 'NorESM2-MH', 'NorESM2-LM', 'NorESM2-MM', 'NorESM2-LME' ], x.lookup('NCC', 'institution_id', 'source_id'))
def test_table_id_lookup(self): x = cmip6.CMIP6_CVs() self.assertEqual(x.lookup('AERmon', 'table_id', 'frequency'), set([dt_freq('mon')])) self.assertEqual(x.lookup('AERmon', 'table_id', 'table_freq'), set(['mon'])) self.assertEqual( x.lookup(dt_freq('mon'), 'frequency', 'table_id'), set([ 'EmonZ', 'AERmon', 'SImon', 'Amon', 'CFmon', 'Omon', 'ImonGre', 'Emon', 'ImonAnt', 'Lmon', 'LImon', 'Oclim', 'AERmonZ' ])) self.assertEqual( x.lookup('mon', 'table_freq', 'table_id'), set([ 'EmonZ', 'AERmon', 'SImon', 'Amon', 'CFmon', 'Omon', 'ImonGre', 'Emon', 'ImonAnt', 'Lmon', 'LImon', 'Oclim', 'AERmonZ' ]))
def __init__(self, case_dict, DateFreqMixin=None): # set root_dir # from experiment and model, determine institution and mip # set realization code = 'r1i1p1f1' unless specified cmip = cmip6.CMIP6_CVs() if 'activity_id' not in case_dict: if 'experiment_id' in case_dict: key = case_dict['experiment_id'] elif 'experiment' in case_dict: key = case_dict['experiment'] else: raise Exception("Can't determine experiment.") self.experiment_id = key self.activity_id = cmip.lookup(key, 'experiment_id', 'activity_id') if 'institution_id' not in case_dict: if 'source_id' in case_dict: key = case_dict['source_id'] elif 'model' in case_dict: key = case_dict['model'] else: raise Exception("Can't determine model/source.") self.source_id = key self.institution_id = cmip.lookup(key, 'source_id', 'institution_id') if 'member_id' not in case_dict: self.member_id = 'r1i1p1f1' case_dict['CASE_ROOT_DIR'] = os.path.join( self._cmip6_root, self.activity_id, self.institution_id, self.source_id, self.experiment_id, self.member_id) # assign explicitly else linter complains self.data_freq = None self.table_id = None self.grid_label = None self.version_date = None super(Gfdlcmip6abcDataManager, self).__init__(case_dict, DateFreqMixin=cmip6.CMIP6DateFrequency) if 'data_freq' in self.__dict__: self.table_id = cmip.table_id_from_freq(self.data_freq)
def test_is_in_cv(self): x = cmip6.CMIP6_CVs() self.assertTrue(x.is_in_cv('table_id', 'IyrGre'))
def __post_init__(self, log=_log, model=None, experiment=None): super(CMIP6DataSourceAttributes, self).__post_init__(log=log) config = core.ConfigManager() cv = cmip6.CMIP6_CVs() def _init_x_from_y(source, dest): if not getattr(self, dest, ""): try: source_val = getattr(self, source, "") if not source_val: raise KeyError() dest_val = cv.lookup_single(source_val, source, dest) log.debug("Set %s='%s' based on %s='%s'.", dest, dest_val, source, source_val) setattr(self, dest, dest_val) except KeyError: log.debug("Couldn't set %s from %s='%s'.", dest, source, source_val) setattr(self, dest, "") if not self.CASE_ROOT_DIR and config.CASE_ROOT_DIR: log.debug("Using global CASE_ROOT_DIR = '%s'.", config.CASE_ROOT_DIR) self.CASE_ROOT_DIR = config.CASE_ROOT_DIR # verify case root dir exists if not os.path.isdir(self.CASE_ROOT_DIR): log.critical("Data directory CASE_ROOT_DIR = '%s' not found.", self.CASE_ROOT_DIR) util.exit_handler(code=1) # should really fix this at the level of CLI flag synonyms if model and not self.source_id: self.source_id = model if experiment and not self.experiment_id: self.experiment_id = experiment # validate non-empty field values for field in dataclasses.fields(self): val = getattr(self, field.name, "") if not val: continue try: if not cv.is_in_cv(field.name, val): log.error(( "Supplied value '%s' for '%s' is not recognized by " "the CMIP6 CV. Continuing, but queries will probably fail." ), val, field.name) except KeyError: # raised if not a valid CMIP6 CV category continue # currently no inter-field consistency checks: happens implicitly, since # set_experiment will find zero experiments. # Attempt to determine first few fields of DRS, to avoid having to crawl # entire DRS structure _init_x_from_y('experiment_id', 'activity_id') _init_x_from_y('source_id', 'institution_id') _init_x_from_y('institution_id', 'source_id') # TODO: multi-column lookups # set CATALOG_DIR to be further down the hierarchy if possible, to # avoid having to crawl entire DRS strcture; CASE_ROOT_DIR remains the # root of the DRS hierarchy new_root = self.CASE_ROOT_DIR for drs_attr in ("activity_id", "institution_id", "source_id", "experiment_id"): drs_val = getattr(self, drs_attr, "") if not drs_val: break new_root = os.path.join(new_root, drs_val) if not os.path.isdir(new_root): log.error("Data directory '%s' not found; starting crawl at '%s'.", new_root, self.CASE_ROOT_DIR) self.CATALOG_DIR = self.CASE_ROOT_DIR else: self.CATALOG_DIR = new_root