示例#1
0
    def prepare_runconfig(self, job_params):
        """
        To prepare the final completed runconfig context.json which will be fed in
        to the pge
        :return: dict
        """
        logger.debug("Preparing runconfig for {}".format(
            self._pge_config.get('pge_name')))
        empty_field_identifier = self._pge_config.get(
            ChimeraConstants.EMPTY_FIELD_IDENTIFIER, EMPTY_FIELD_IDENTIFIER)
        logger.debug(
            "Empty field identifier: {}".format(empty_field_identifier))
        output_context = dict()
        optional_fields = self._pge_config.get(
            ChimeraConstants.OPTIONAL_FIELDS, [])
        if self._pge_config.get(ChimeraConstants.RUNCONFIG):
            output_context = copy.deepcopy(
                self._pge_config.get(ChimeraConstants.RUNCONFIG))
            matched_keys = self.repl_val_in_dict(
                output_context,
                empty_field_identifier,
                job_params,
                optional_fields=optional_fields)
        else:
            raise KeyError("Key runconfig not found in PGE config file")

        # Add localized urls
        output_context[ChimeraConstants.LOCALIZE] = self.localize_paths(
            output_context)
        output_context[ChimeraConstants.SIMULATE_OUTPUTS] = self._settings[
            ChimeraConstants.PGE_SIM_MODE]

        return output_context
示例#2
0
 def prepare_psuedo_context(self, psuedo_context):
     """
     Write the gathered job and product metadata information to the psuedo context file.
     :return: dict
     """
     logger.debug("Preparing psuedo_context file after {} run".format(
         self._pge_config.get("pge_name")))
     # write out job context
     psu_context = open(
         "{}_context.json".format(self._pge_config.get("pge_name")), "w")
     psu_context.write(json.dumps(psuedo_context))
     psu_context.close()
     return "{}_context.json".format(self._pge_config.get("pge_name"))
示例#3
0
    def __init__(self,
                 context,
                 run_config,
                 pge_config_file,
                 settings_file,
                 wuid=None,
                 job_num=None):
        # load context file
        if isinstance(context, dict):
            self._context = context
        elif isinstance(context, str):
            self._context = json.load(open(context, 'r'))
        logger.debug("Loaded context file: {}".format(json.dumps(
            self._context)))

        # This is intended to represent the top level working directory of the job. It's assumed to be at the same
        # level as the given context file.
        self._base_work_dir = os.path.dirname(os.path.abspath(context))

        # load pge config file
        self._pge_config = load_config(pge_config_file)
        logger.debug("Loaded PGE config file: {}".format(
            json.dumps(self._pge_config)))

        self._wuid = wuid
        self._job_num = job_num

        # load Settings file
        try:
            if settings_file:
                settings_file = os.path.abspath(
                    os.path.normpath(settings_file))
                self._settings = YamlConf(settings_file).cfg
                self._chimera_config = self._settings.get("CHIMERA", None)
                if self._wuid and self._job_num is not None:
                    if not self._chimera_config:
                        raise RuntimeError(
                            "Must specify a CHIMERA area in {}".format(
                                settings_file))
        except Exception as e:
            if settings_file:
                file_name = settings_file
            else:
                file_name = '~/verdi/etc/settings.yaml'
            raise RuntimeError("Could not read settings file '{}': {}".format(
                file_name, e))

        self._run_config = run_config
示例#4
0
    def __init__(self, sf_context, chimera_config_filepath,
                 pge_config_filepath, settings_file):
        # load context file
        if isinstance(sf_context, dict):
            self._sf_context = sf_context
        elif isinstance(sf_context, str):
            self._sf_context = json.load(open(sf_context, 'r'))
        logger.debug("Loaded context file: {}".format(
            json.dumps(self._sf_context)))

        # load pge config file
        self._pge_config = load_config(pge_config_filepath)
        logger.debug("Loaded PGE config file: {}".format(
            json.dumps(self._pge_config)))

        # load IPP config file
        try:
            self._chimera_config = YamlConf(chimera_config_filepath).cfg
            self._module_path = self._chimera_config.get(
                "preprocessor", {}).get("module_path", None)
            if not self._module_path:
                raise RuntimeError(
                    "'module_path' must be defined in the 'preprocessor' section of the "
                    "Chimera Config file '{}'".format(chimera_config_filepath))
            self._class_name = self._chimera_config.get("preprocessor",
                                                        {}).get(
                                                            "class_name", None)
            if not self._class_name:
                raise RuntimeError(
                    "'class_name' must be defined in the 'preprocessor' section of the "
                    "Chimera Config file '{}'".format(chimera_config_filepath))
        except Exception as e:
            raise RuntimeError(
                "Could not read preconditions definition file : {}".format(e))

        # load Settings file
        try:
            if settings_file:
                settings_file = os.path.abspath(
                    os.path.normpath(settings_file))
                self._settings = YamlConf(settings_file).cfg
        except Exception as e:
            if settings_file:
                file_name = settings_file
            else:
                file_name = '~/verdi/etc/settings.yaml'
            raise RuntimeError("Could not read settings file '{}': {}".format(
                file_name, e))
示例#5
0
    def wait_for_doc(self, endpoint, query, timeout):
        """
        This function executes the search query for specified wait time until
        document is found
        :param endpoint: GRQ or MOZART
        :param query: search query
        :param timeout: time to wait in seconds
        :return: True if document found else raise suitable Exception
        """
        try:
            result = self.query_es(
                endpoint=endpoint, query=query, request_timeout=30, size=1
            )
            slept_seconds = 0
            sleep_seconds = 2

            while self.wait_condition(endpoint=endpoint, result=result):
                if result.get("timed_out", True):
                    slept_seconds += 30

                if slept_seconds + sleep_seconds < timeout:
                    logger.debug("Slept for {} seconds".format(slept_seconds))
                    logger.debug("Sleeping for {} seconds".format(sleep_seconds))
                else:
                    sleep_seconds = timeout - slept_seconds
                    logger.debug("Slept for {} seconds".format(slept_seconds))
                    logger.debug(
                        "Sleeping for {} seconds to conform to timeout "
                        "of {} seconds".format(sleep_seconds, timeout)
                    )

                if slept_seconds >= timeout:
                    if len(result.get("hits").get("hits")) == 0:
                        raise Exception(
                            "{} ES taking too long to index document".format(endpoint)
                        )
                    if endpoint == self.MOZART_ES_ENDPOINT:
                        if (
                            str(result["hits"]["hits"][0]["_source"]["status"])
                            == "job-started"
                        ):
                            raise Exception(
                                "{} ES taking too long to update status of "
                                "job".format(endpoint)
                            )

                time.sleep(sleep_seconds)
                result = self.query_es(
                    endpoint=endpoint, query=query, request_timeout=30, size=1
                )
                slept_seconds += sleep_seconds
                sleep_seconds *= 2
            return True
        except Exception as e:
            raise Exception("ElasticSearch Operation failed due to : {}".format(str(e)))
示例#6
0
    def localize_paths(self, output_context):
        """
        To set file to localize in the docker
        :param output_context:
        """
        logger.debug("Preparing to localize file paths")

        # Deprecated function since not all values in localize_groups are on s3
        # for example SPS config files
        def is_url(val):
            parse_result = urlparse(val)
            schemes = [
                "s3", "s3s", "http", "https", "ftp", "sftp", "azure", "azures",
                "rsync"
            ]
            return parse_result.scheme in schemes

        localize_paths_list = []
        for group in self._pge_config.get(ChimeraConstants.LOCALIZE_GROUPS,
                                          []):
            for elem in output_context.get(group, []):
                value = output_context.get(group).get(elem)

                # If the value is a list, example some InputFilGroups could be
                # scalars or vectors
                if isinstance(value, list):
                    for v in value:
                        if is_url(v):
                            localize_paths_list.append(v)

                elif isinstance(value, str):
                    if is_url(value):
                        localize_paths_list.append(value)

                else:
                    continue

        return localize_paths_list
示例#7
0
    def submit_job(self):
        if not isinstance(self._run_config, dict):
            raise RuntimeError(
                "The output from input preprocessor is not a dictionary")

        params = self.construct_params()

        # If wuid and job_num are not null, it is implied that we need to do job submission. In that case, we need to
        # construct the job payload.
        if self._wuid and self._job_num is not None:
            # get HySDS job type and queue information
            job_name = self._chimera_config.get(chimera_const.JOB_TYPES).get(
                self._pge_config.get(chimera_const.PGE_NAME))
            job_queue = self._chimera_config.get(chimera_const.JOB_QUEUES).get(
                self._pge_config.get(chimera_const.PGE_NAME))

            if chimera_const.RELEASE_VERSION in self._context:
                release_version = self._context[chimera_const.RELEASE_VERSION]
            else:
                release_version = self._context.get(
                    'container_specification').get('version')

            job_type = job_name + ":" + release_version

            localize_hash = self.get_payload_hash(job_type)

            # Find what the primary input is to the job
            # input_file_key = self._pge_config.get(chimera_const.PRIMARY_INPUT, None)
            # dataset_id = self.get_input_file_name(input_file_key)

            # Nominally, the primary input is used as part of the job name. If we wanted to set something else in the
            # job
            # name, look to see if the pge_job_name field is specified in the run_config
            dataset_id = self._run_config.get("pge_job_name", None)

            if dataset_id:
                logger.info("dataset_id is set to {}".format(dataset_id))

            job_json = self.construct_job_payload(params,
                                                  dataset_id=dataset_id,
                                                  pge_config=self._pge_config,
                                                  job_type=job_type,
                                                  job_queue=job_queue,
                                                  payload_hash=localize_hash)
            # Set the sciflo fields wuid and job num
            # these are internally passed context information available in sciflo processes
            job_json['payload']['_sciflo_wuid'] = self._wuid
            job_json['payload']['_sciflo_job_num'] = self._job_num

            logger.debug("Resolved Job JSON: {}".format(json.dumps(job_json)))
        else:
            # If we're running inline, we will set the params as the job_json
            job_json = params
            # We also need to get the job_specification from _context.json as that contains dependency image
            # information, if specified
            if "job_specification" in self._context:
                job_json["job_specification"] = self._context[
                    "job_specification"]
        job_json = self.perform_adaptation_tasks(job_json)

        return job_json