def framework_verify(code_root, run_output): print("Checking linked output files") try: html_root = os.path.join(run_output, 'index.html') if not os.path.exists(html_root): raise IOError( "Can't find framework html output in {}".format(html_root)) link_verifier = LinkVerifier(html_root, verbose=False) missing_dict = link_verifier.verify_all_links() except Exception as exc: fatal_exception_handler(exc, "ERROR in link verification.") if missing_dict: print("ERROR: the following files are missing:") print(util.pretty_print_json(missing_dict)) util.exit_handler(code=1) print("SUCCESS: no missing links found.") print("Finished: framework test run successful!")
def parse_pod_list(self, pod_list, pod_info_tuple): pod_data = pod_info_tuple.pod_data # pod names -> contents of settings file args = util.to_iter(pod_list, set) bad_args = [] pods = [] for arg in args: if arg == 'all': # add all PODs except example PODs pods.extend( [p for p in pod_data if not p.startswith('example')]) elif arg == 'example' or arg == 'examples': # add example PODs pods.extend([p for p in pod_data if p.startswith('example')]) elif arg in pod_info_tuple.realm_data: # realm_data: realm name -> list of POD names # add all PODs for this realm pods.extend(pod_info_tuple.realm_data[arg]) elif arg in pod_data: # add POD by name pods.append(arg) else: # unrecognized argument _log.error("POD identifier '%s' not recognized.", arg) bad_args.append(arg) if bad_args: valid_args = ['all', 'examples'] \ + pod_info_tuple.sorted_realms \ + pod_info_tuple.sorted_pods _log.critical(( "The following POD identifiers were not recognized: " "[%s].\nRecognized identifiers are: [%s].\n(Received --pods = %s)." ), ', '.join(f"'{p}'" for p in bad_args), ', '.join(f"'{p}'" for p in valid_args), str(list(args))) util.exit_handler(code=1) pods = list(set(pods)) # delete duplicates if not pods: _log.critical(( "ERROR: no PODs selected to be run. Do `./mdtf info pods`" " for a list of available PODs, and check your -p/--pods argument." f"\nReceived --pods = {str(list(args))}")) util.exit_handler(code=1) return pods
def verify_paths(self, config, p): # needs to be here, instead of PathManager, because we subclass it in # NOAA_GFDL keep_temp = config.get('keep_temp', False) # clean out WORKING_DIR if we're not keeping temp files: if os.path.exists(p.WORKING_DIR) and not \ (keep_temp or p.WORKING_DIR == p.OUTPUT_DIR): shutil.rmtree(p.WORKING_DIR) try: for dir_name, create_ in (('CODE_ROOT', False), ('OBS_DATA_ROOT', False), ('MODEL_DATA_ROOT', True), ('WORKING_DIR', True)): util.check_dir(p, dir_name, create=create_) except Exception as exc: _log.fatal((f"Input settings for {dir_name} mis-specified (caught " f"{repr(exc)}.)")) util.exit_handler(code=1)
def _set_case_root_dir(self, log=_log): """Additional logic to set CASE_ROOT_DIR from MODEL_DATA_ROOT. """ config = core.ConfigManager() paths = core.PathManager() if not self.CASE_ROOT_DIR and config.CASE_ROOT_DIR: log.debug("Using global CASE_ROOT_DIR = '%s'.", config.CASE_ROOT_DIR) self.CASE_ROOT_DIR = config.CASE_ROOT_DIR if not self.CASE_ROOT_DIR: model_root = getattr(paths, 'MODEL_DATA_ROOT', None) log.debug("Setting CASE_ROOT_DIR to MODEL_DATA_ROOT = '%s'.", model_root) self.CASE_ROOT_DIR = model_root # verify CASE_ROOT_DIR exists if not os.path.isdir(self.CASE_ROOT_DIR): log.critical("Data directory CASE_ROOT_DIR = '%s' not found.", self.CASE_ROOT_DIR) util.exit_handler(code=1)
def __post_init__(self, log=_log): """Validate user input. """ super(ExplicitFileDataAttributes, self).__post_init__(log=log) config = core.ConfigManager() if not self.config_file: self.config_file = config.get('config_file', '') if not self.config_file: log.critical( ("No configuration file found for ExplicitFileDataSource " "(--config-file).")) util.exit_handler(code=1) if self.convention != core._NO_TRANSLATION_CONVENTION: log.debug( "Received incompatible convention '%s'; setting to '%s'.", self.convention, core._NO_TRANSLATION_CONVENTION) self.convention = core._NO_TRANSLATION_CONVENTION
def __post_init__(self, log=_log): """Validate user input. """ super(SampleDataAttributes, self).__post_init__(log=log) # set sample_dataset if not self.sample_dataset and self.CASENAME: log.debug( "'sample_dataset' not supplied, using CASENAME = '%s'.", self.CASENAME ) self.sample_dataset = self.CASENAME # verify chosen subdirectory exists if not os.path.isdir( os.path.join(self.CASE_ROOT_DIR, self.sample_dataset) ): log.critical( "Sample dataset '%s' not found in CASE_ROOT_DIR = '%s'.", self.sample_dataset, self.CASE_ROOT_DIR) util.exit_handler(code=1)
def untar_data(ftp_data, install_config): """Extract tar files of obs/model data and move contents to correct location. """ if platform.system() == 'Darwin': # workaround for macos tar_cmd = 'open -W -g -j -a "{}" ' test_path = "/System/Library/CoreServices/Applications/Archive Utility.app" if os.path.exists(test_path): tar_cmd = tar_cmd.format(test_path) else: # Location on Yosemite and earlier test_path = "/System/Library/CoreServices/Archive Utility.app" if os.path.exists(test_path): tar_cmd = tar_cmd.format(test_path) else: print("ERROR: could not find Archive Utility.app.") util.exit_handler(code=1) else: tar_cmd = 'tar -xf ' for f in iter(ftp_data.values()): print("Extracting {}".format(f.file)) cwd = install_config[f.target_dir] f_subdir_0 = f.contents_subdir.split(os.sep)[0] try: _ = shell_command_wrapper(tar_cmd + f.file, cwd=cwd) except Exception as exc: fatal_exception_handler( exc, "ERROR: could not extract {}.".format(f.file)) try: for d in os.listdir(os.path.join(cwd, f.contents_subdir)): shutil.move(os.path.join(cwd, f.contents_subdir, d), os.path.join(cwd, d)) shutil.rmtree(os.path.join(cwd, f_subdir_0)) except Exception as exc: fatal_exception_handler( exc, "ERROR: could not move contents of {}.".format(f.file)) try: os.remove(os.path.join(cwd, f.file)) except Exception as exc: fatal_exception_handler( exc, "ERROR: could not delete {}.".format(f.file))
def __init__(self, cli_obj): super(MDTFFramework, self).__init__(name=self.__class__.__name__, _parent=None, status=ObjectStatus.ACTIVE) self.code_root = cli_obj.code_root self.pod_list = [] self.cases = dict() self.global_env_vars = dict() try: # load pod data pod_info_tuple = mdtf_info.load_pod_settings(self.code_root) # load log config log_config = cli.read_config_file(self.code_root, "logging.jsonc", site=cli_obj.site) self.configure(cli_obj, pod_info_tuple, log_config) except Exception as exc: tb_exc = traceback.TracebackException(*(sys.exc_info())) _log.critical("Framework caught exception %r", exc) print(''.join(tb_exc.format())) util.exit_handler(code=1)
def verify_paths(self, config, p): keep_temp = config.get('keep_temp', False) # clean out WORKING_DIR if we're not keeping temp files: if os.path.exists(p.WORKING_DIR) and not \ (keep_temp or p.WORKING_DIR == p.OUTPUT_DIR): gfdl_util.rmtree_wrapper(p.WORKING_DIR) try: for dir_name, create_ in ( ('CODE_ROOT', False), ('OBS_DATA_REMOTE', False), ('OBS_DATA_ROOT', True), ('MODEL_DATA_ROOT', True), ('WORKING_DIR', True) ): util.check_dir(p, dir_name, create=create_) except Exception as exc: _log.fatal((f"Input settings for {dir_name} mis-specified (caught " f"{repr(exc)}.)")) util.exit_handler(code=1) # Use GCP to create OUTPUT_DIR on a volume that may be read-only if not os.path.exists(p.OUTPUT_DIR): gfdl_util.make_remote_dir(p.OUTPUT_DIR, self.timeout, self.dry_run, log=_log)
def parse_flags(self, cli_obj): if cli_obj.config.get('dry_run', False): cli_obj.config['test_mode'] = True if cli_obj.config.get('disable_preprocessor', False): _log.warning(("User disabled metadata checks and unit conversion in " "preprocessor."), tags=util.ObjectLogTag.BANNER) if cli_obj.config.get('overwrite_file_metadata', False): _log.warning(("User chose to overwrite input file metadata with " "framework values (convention = '%s')."), cli_obj.config.get('convention', ''), tags=util.ObjectLogTag.BANNER ) # check this here, otherwise error raised about missing caselist is not informative try: if cli_obj.config.get('CASE_ROOT_DIR', ''): util.check_dir(cli_obj.config['CASE_ROOT_DIR'], 'CASE_ROOT_DIR', create=False) except Exception as exc: _log.fatal((f"Mis-specified input for CASE_ROOT_DIR (received " f"'{cli_obj.config.get('CASE_ROOT_DIR', '')}', caught {repr(exc)}.)")) util.exit_handler(code=1)
if 'CASE_ROOT_DIR' not in cli_d and d.get('root_dir', None): # CASE_ROOT was set positionally cli_d['CASE_ROOT_DIR'] = d['root_dir'] case_list_in = [cli_d] else: case_list_in = util.to_iter(cli_obj.file_case_list) self.cases = dict() for i, case_d in enumerate(case_list_in): case = self.parse_case(i, case_d, cli_obj, pod_info_tuple) if case: self.cases[case['CASENAME']] = case if not self.cases: _log.critical(("No valid entries in case_list. Please specify " "model run information.\nReceived:" f"\n{util.pretty_print_json(case_list_in)}")) util.exit_handler(code=1) def verify_paths(self, config, p): # needs to be here, instead of PathManager, because we subclass it in # NOAA_GFDL keep_temp = config.get('keep_temp', False) # clean out WORKING_DIR if we're not keeping temp files: if os.path.exists(p.WORKING_DIR) and not \ (keep_temp or p.WORKING_DIR == p.OUTPUT_DIR): shutil.rmtree(p.WORKING_DIR) try: for dir_name, create_ in (('CODE_ROOT', False), ('OBS_DATA_ROOT', False), ('MODEL_DATA_ROOT', True), ('WORKING_DIR', True)):
def load_pod_settings(code_root, pod=None, pod_list=None): """Wrapper to load and parse the contents of POD settings files, used by :class:`~src.core.MDTFFramework` and :class:`InfoCLIHandler`. Args: code_root (str): Absolute path to t pod (str, optional): pod_list (list, optional): List of POD names to load settings files. Raises: :class:`~src.util.PodConfigError`: If an error is raised opening or parsing the contents of a settings file. In normal operation, this is treated as a fatal error and will cause package exit. Returns: Instance of :data:`PodDataTuple`. """ _pod_dir = 'diagnostics' _file_name = 'settings.jsonc' def _load_one_json(pod_): pod_dir = os.path.join(code_root, _pod_dir, pod_) settings_path = os.path.join(pod_dir, _file_name) try: d = util.read_json(settings_path) for section in ['settings', 'varlist']: if section not in d: raise AssertionError( f"'{section}' entry not found in '{_file_name}'.") except util.MDTFFileNotFoundError as exc: if not os.path.isdir(pod_dir): raise util.PodConfigError( (f"'{pod_}' directory not found in " f"'{os.path.join(code_root, _pod_dir)}'."), pod_) elif not os.path.isfile(settings_path): raise util.PodConfigError((f"'{_file_name}' file not found in " f"'{pod_dir}'."), pod_) else: raise exc except (JSONDecodeError, AssertionError) as exc: raise util.PodConfigError((f"Syntax error in '{_file_name}': " f"{str(exc)}."), pod_) except Exception as exc: raise util.PodConfigError( (f"Error encountered in reading '{_file_name}': " f"{repr(exc)}."), pod_) return d # get list of pods if not pod_list: pod_list = os.listdir(os.path.join(code_root, _pod_dir)) pod_list = [s for s in pod_list if not s.startswith(('_', '.'))] pod_list.sort(key=str.lower) if pod == 'list': return pod_list # load one settings.jsonc file if pod is not None: if pod not in pod_list: print( f"Couldn't recognize '{pod}' out of the following diagnostics:" ) print(', '.join(pod_list)) return dict() return _load_one_json(pod) # load all of them pods = dict() realm_list = set() bad_pods = [] realms = collections.defaultdict(list) for p in pod_list: try: d = _load_one_json(p) except Exception as exc: _log.error(exc) bad_pods.append(p) continue pods[p] = d # PODs requiring data from multiple realms get stored in the dict # under a tuple of those realms; realms stored indivudally in realm_list _realm = util.to_iter(d['settings'].get('realm', None), tuple) if len(_realm) == 0: continue elif len(_realm) == 1: _realm = _realm[0] realm_list.add(_realm) else: realm_list.update(_realm) realms[_realm].append(p) if bad_pods: _log.critical( ("Errors were encountered when finding the following PODS: " "[%s]."), ', '.join(f"'{p}'" for p in bad_pods)) util.exit_handler(code=1) return PodDataTuple(pod_data=pods, realm_data=realms, sorted_pods=pod_list, sorted_realms=sorted(list(realm_list), key=str.lower))
def __post_init__(self, log=_log, model=None, experiment=None): super(CMIP6DataSourceAttributes, self).__post_init__(log=log) config = core.ConfigManager() cv = cmip6.CMIP6_CVs() def _init_x_from_y(source, dest): if not getattr(self, dest, ""): try: source_val = getattr(self, source, "") if not source_val: raise KeyError() dest_val = cv.lookup_single(source_val, source, dest) log.debug("Set %s='%s' based on %s='%s'.", dest, dest_val, source, source_val) setattr(self, dest, dest_val) except KeyError: log.debug("Couldn't set %s from %s='%s'.", dest, source, source_val) setattr(self, dest, "") if not self.CASE_ROOT_DIR and config.CASE_ROOT_DIR: log.debug("Using global CASE_ROOT_DIR = '%s'.", config.CASE_ROOT_DIR) self.CASE_ROOT_DIR = config.CASE_ROOT_DIR # verify case root dir exists if not os.path.isdir(self.CASE_ROOT_DIR): log.critical("Data directory CASE_ROOT_DIR = '%s' not found.", self.CASE_ROOT_DIR) util.exit_handler(code=1) # should really fix this at the level of CLI flag synonyms if model and not self.source_id: self.source_id = model if experiment and not self.experiment_id: self.experiment_id = experiment # validate non-empty field values for field in dataclasses.fields(self): val = getattr(self, field.name, "") if not val: continue try: if not cv.is_in_cv(field.name, val): log.error(( "Supplied value '%s' for '%s' is not recognized by " "the CMIP6 CV. Continuing, but queries will probably fail." ), val, field.name) except KeyError: # raised if not a valid CMIP6 CV category continue # currently no inter-field consistency checks: happens implicitly, since # set_experiment will find zero experiments. # Attempt to determine first few fields of DRS, to avoid having to crawl # entire DRS structure _init_x_from_y('experiment_id', 'activity_id') _init_x_from_y('source_id', 'institution_id') _init_x_from_y('institution_id', 'source_id') # TODO: multi-column lookups # set CATALOG_DIR to be further down the hierarchy if possible, to # avoid having to crawl entire DRS strcture; CASE_ROOT_DIR remains the # root of the DRS hierarchy new_root = self.CASE_ROOT_DIR for drs_attr in ("activity_id", "institution_id", "source_id", "experiment_id"): drs_val = getattr(self, drs_attr, "") if not drs_val: break new_root = os.path.join(new_root, drs_val) if not os.path.isdir(new_root): log.error("Data directory '%s' not found; starting crawl at '%s'.", new_root, self.CASE_ROOT_DIR) self.CATALOG_DIR = self.CASE_ROOT_DIR else: self.CATALOG_DIR = new_root
def load_pod_settings(code_root, pod=None, pod_list=None): """Wrapper to load POD settings files, used by ConfigManager and CLIInfoHandler. """ # only place we can put it would be util.py if we want to avoid circular imports _pod_dir = 'diagnostics' _file_name = 'settings.jsonc' def _load_one_json(pod_): pod_dir = os.path.join(code_root, _pod_dir, pod_) settings_path = os.path.join(pod_dir, _file_name) try: d = util.read_json(settings_path) for section in ['settings', 'varlist']: if section not in d: raise AssertionError( f"'{section}' entry not found in '{_file_name}'.") except util.MDTFFileNotFoundError as exc: if not os.path.isdir(pod_dir): raise util.PodConfigError( (f"'{pod_}' directory not found in " f"'{os.path.join(code_root, _pod_dir)}'."), pod_) elif not os.path.isfile(settings_path): raise util.PodConfigError((f"'{_file_name}' file not found in " f"'{pod_dir}'."), pod_) else: raise exc except (JSONDecodeError, AssertionError) as exc: raise util.PodConfigError((f"Syntax error in '{_file_name}': " f"{str(exc)}."), pod_) except Exception as exc: raise util.PodConfigError( (f"Error encountered in reading '{_file_name}': " f"{repr(exc)}."), pod_) return d # get list of pods if not pod_list: pod_list = os.listdir(os.path.join(code_root, _pod_dir)) pod_list = [s for s in pod_list if not s.startswith(('_', '.'))] pod_list.sort(key=str.lower) if pod == 'list': return pod_list # load one settings.jsonc file if pod is not None: if pod not in pod_list: print( f"Couldn't recognize '{pod}' out of the following diagnostics:" ) print(', '.join(pod_list)) return dict() return _load_one_json(pod) # load all of them pods = dict() realm_list = set() bad_pods = [] realms = collections.defaultdict(list) for p in pod_list: try: d = _load_one_json(p) except Exception as exc: _log.error(exc) bad_pods.append(p) continue pods[p] = d # PODs requiring data from multiple realms get stored in the dict # under a tuple of those realms; realms stored indivudally in realm_list _realm = util.to_iter(d['settings'].get('realm', None), tuple) if len(_realm) == 0: continue elif len(_realm) == 1: _realm = _realm[0] realm_list.add(_realm) else: realm_list.update(_realm) realms[_realm].append(p) if bad_pods: _log.critical( ("Errors were encountered when finding the following PODS: " "[%s]."), ', '.join(f"'{p}'" for p in bad_pods)) util.exit_handler(code=1) return PodDataTuple(pod_data=pods, realm_data=realms, sorted_pods=pod_list, sorted_realms=sorted(list(realm_list), key=str.lower))