def prepare_runconfig(self, job_params): """ To prepare the final completed runconfig context.json which will be fed in to the pge :return: dict """ logger.debug("Preparing runconfig for {}".format( self._pge_config.get('pge_name'))) empty_field_identifier = self._pge_config.get( ChimeraConstants.EMPTY_FIELD_IDENTIFIER, EMPTY_FIELD_IDENTIFIER) logger.debug( "Empty field identifier: {}".format(empty_field_identifier)) output_context = dict() optional_fields = self._pge_config.get( ChimeraConstants.OPTIONAL_FIELDS, []) if self._pge_config.get(ChimeraConstants.RUNCONFIG): output_context = copy.deepcopy( self._pge_config.get(ChimeraConstants.RUNCONFIG)) matched_keys = self.repl_val_in_dict( output_context, empty_field_identifier, job_params, optional_fields=optional_fields) else: raise KeyError("Key runconfig not found in PGE config file") # Add localized urls output_context[ChimeraConstants.LOCALIZE] = self.localize_paths( output_context) output_context[ChimeraConstants.SIMULATE_OUTPUTS] = self._settings[ ChimeraConstants.PGE_SIM_MODE] return output_context
def prepare_psuedo_context(self, psuedo_context): """ Write the gathered job and product metadata information to the psuedo context file. :return: dict """ logger.debug("Preparing psuedo_context file after {} run".format( self._pge_config.get("pge_name"))) # write out job context psu_context = open( "{}_context.json".format(self._pge_config.get("pge_name")), "w") psu_context.write(json.dumps(psuedo_context)) psu_context.close() return "{}_context.json".format(self._pge_config.get("pge_name"))
def __init__(self, context, run_config, pge_config_file, settings_file, wuid=None, job_num=None): # load context file if isinstance(context, dict): self._context = context elif isinstance(context, str): self._context = json.load(open(context, 'r')) logger.debug("Loaded context file: {}".format(json.dumps( self._context))) # This is intended to represent the top level working directory of the job. It's assumed to be at the same # level as the given context file. self._base_work_dir = os.path.dirname(os.path.abspath(context)) # load pge config file self._pge_config = load_config(pge_config_file) logger.debug("Loaded PGE config file: {}".format( json.dumps(self._pge_config))) self._wuid = wuid self._job_num = job_num # load Settings file try: if settings_file: settings_file = os.path.abspath( os.path.normpath(settings_file)) self._settings = YamlConf(settings_file).cfg self._chimera_config = self._settings.get("CHIMERA", None) if self._wuid and self._job_num is not None: if not self._chimera_config: raise RuntimeError( "Must specify a CHIMERA area in {}".format( settings_file)) except Exception as e: if settings_file: file_name = settings_file else: file_name = '~/verdi/etc/settings.yaml' raise RuntimeError("Could not read settings file '{}': {}".format( file_name, e)) self._run_config = run_config
def __init__(self, sf_context, chimera_config_filepath, pge_config_filepath, settings_file): # load context file if isinstance(sf_context, dict): self._sf_context = sf_context elif isinstance(sf_context, str): self._sf_context = json.load(open(sf_context, 'r')) logger.debug("Loaded context file: {}".format( json.dumps(self._sf_context))) # load pge config file self._pge_config = load_config(pge_config_filepath) logger.debug("Loaded PGE config file: {}".format( json.dumps(self._pge_config))) # load IPP config file try: self._chimera_config = YamlConf(chimera_config_filepath).cfg self._module_path = self._chimera_config.get( "preprocessor", {}).get("module_path", None) if not self._module_path: raise RuntimeError( "'module_path' must be defined in the 'preprocessor' section of the " "Chimera Config file '{}'".format(chimera_config_filepath)) self._class_name = self._chimera_config.get("preprocessor", {}).get( "class_name", None) if not self._class_name: raise RuntimeError( "'class_name' must be defined in the 'preprocessor' section of the " "Chimera Config file '{}'".format(chimera_config_filepath)) except Exception as e: raise RuntimeError( "Could not read preconditions definition file : {}".format(e)) # load Settings file try: if settings_file: settings_file = os.path.abspath( os.path.normpath(settings_file)) self._settings = YamlConf(settings_file).cfg except Exception as e: if settings_file: file_name = settings_file else: file_name = '~/verdi/etc/settings.yaml' raise RuntimeError("Could not read settings file '{}': {}".format( file_name, e))
def wait_for_doc(self, endpoint, query, timeout): """ This function executes the search query for specified wait time until document is found :param endpoint: GRQ or MOZART :param query: search query :param timeout: time to wait in seconds :return: True if document found else raise suitable Exception """ try: result = self.query_es( endpoint=endpoint, query=query, request_timeout=30, size=1 ) slept_seconds = 0 sleep_seconds = 2 while self.wait_condition(endpoint=endpoint, result=result): if result.get("timed_out", True): slept_seconds += 30 if slept_seconds + sleep_seconds < timeout: logger.debug("Slept for {} seconds".format(slept_seconds)) logger.debug("Sleeping for {} seconds".format(sleep_seconds)) else: sleep_seconds = timeout - slept_seconds logger.debug("Slept for {} seconds".format(slept_seconds)) logger.debug( "Sleeping for {} seconds to conform to timeout " "of {} seconds".format(sleep_seconds, timeout) ) if slept_seconds >= timeout: if len(result.get("hits").get("hits")) == 0: raise Exception( "{} ES taking too long to index document".format(endpoint) ) if endpoint == self.MOZART_ES_ENDPOINT: if ( str(result["hits"]["hits"][0]["_source"]["status"]) == "job-started" ): raise Exception( "{} ES taking too long to update status of " "job".format(endpoint) ) time.sleep(sleep_seconds) result = self.query_es( endpoint=endpoint, query=query, request_timeout=30, size=1 ) slept_seconds += sleep_seconds sleep_seconds *= 2 return True except Exception as e: raise Exception("ElasticSearch Operation failed due to : {}".format(str(e)))
def localize_paths(self, output_context): """ To set file to localize in the docker :param output_context: """ logger.debug("Preparing to localize file paths") # Deprecated function since not all values in localize_groups are on s3 # for example SPS config files def is_url(val): parse_result = urlparse(val) schemes = [ "s3", "s3s", "http", "https", "ftp", "sftp", "azure", "azures", "rsync" ] return parse_result.scheme in schemes localize_paths_list = [] for group in self._pge_config.get(ChimeraConstants.LOCALIZE_GROUPS, []): for elem in output_context.get(group, []): value = output_context.get(group).get(elem) # If the value is a list, example some InputFilGroups could be # scalars or vectors if isinstance(value, list): for v in value: if is_url(v): localize_paths_list.append(v) elif isinstance(value, str): if is_url(value): localize_paths_list.append(value) else: continue return localize_paths_list
def submit_job(self): if not isinstance(self._run_config, dict): raise RuntimeError( "The output from input preprocessor is not a dictionary") params = self.construct_params() # If wuid and job_num are not null, it is implied that we need to do job submission. In that case, we need to # construct the job payload. if self._wuid and self._job_num is not None: # get HySDS job type and queue information job_name = self._chimera_config.get(chimera_const.JOB_TYPES).get( self._pge_config.get(chimera_const.PGE_NAME)) job_queue = self._chimera_config.get(chimera_const.JOB_QUEUES).get( self._pge_config.get(chimera_const.PGE_NAME)) if chimera_const.RELEASE_VERSION in self._context: release_version = self._context[chimera_const.RELEASE_VERSION] else: release_version = self._context.get( 'container_specification').get('version') job_type = job_name + ":" + release_version localize_hash = self.get_payload_hash(job_type) # Find what the primary input is to the job # input_file_key = self._pge_config.get(chimera_const.PRIMARY_INPUT, None) # dataset_id = self.get_input_file_name(input_file_key) # Nominally, the primary input is used as part of the job name. If we wanted to set something else in the # job # name, look to see if the pge_job_name field is specified in the run_config dataset_id = self._run_config.get("pge_job_name", None) if dataset_id: logger.info("dataset_id is set to {}".format(dataset_id)) job_json = self.construct_job_payload(params, dataset_id=dataset_id, pge_config=self._pge_config, job_type=job_type, job_queue=job_queue, payload_hash=localize_hash) # Set the sciflo fields wuid and job num # these are internally passed context information available in sciflo processes job_json['payload']['_sciflo_wuid'] = self._wuid job_json['payload']['_sciflo_job_num'] = self._job_num logger.debug("Resolved Job JSON: {}".format(json.dumps(job_json))) else: # If we're running inline, we will set the params as the job_json job_json = params # We also need to get the job_specification from _context.json as that contains dependency image # information, if specified if "job_specification" in self._context: job_json["job_specification"] = self._context[ "job_specification"] job_json = self.perform_adaptation_tasks(job_json) return job_json