Пример #1
0
    def update(self, options, saveconf, tag=None):
        required = filter(lambda a: a.required, self.parameters)
        tag = tag or self.tag
        for param0 in required:
            if param0.name not in options.keys() and param0.mapping not in options.keys():
                raise StimelaCabParameterError(
                    "Parameter {} is required but has not been specified".format(param0.name))
        self.log.info(f"Validating parameters for cab {self.task} ({self.base}:{tag})")

        for name, value in options.items():
            found = False
            for param in self.parameters:
                if name in [param.name, param.mapping]:
                    found = True
                    if param.deprecated:
                        self.log.warning(f"Parameter {name} for cab {self.task} is deprecated, and will be removed in a future release")
                    if param.io:
                        if value is None:
                            continue
                        param.validate(value)
                        param.value = []
                        if not isinstance(value, (list, tuple)):
                            value = [value]
                        for _value in value:
                            if isinstance(_value, pathformatter):
                                if param.check_io:
                                    raise StimelaCabParameterError("Pathformatters cannot be used on io parameters where io has to be checked")
                                joinlist = _value() # construct placeholder list
                                joined_str = ""
                                for p in joinlist:
                                    if not isinstance(p, placeholder):
                                        joined_str += p
                                    else:
                                        if p() not in IODEST.keys():
                                            raise StimelaCabParameterError('The location \'{0}\' specified for parameter \'{1}\', is unknown. Choices are {2}'.format(
                                                p(), param.name, IODEST.keys()))
                                        location = p()
                                        if location in ["input", "msfile"]:
                                            if location == "input" and self.indir is None:
                                                raise StimelaCabParameterError(
                                                    "You have specified input files, but have not specified an input folder")
                                            if location == "msfile" and self.msdir is None:
                                                raise StimelaCabParameterError(
                                                    "You have specified MS files, but have not specified an MS folder")

                                            joined_str += "{0}/".format(IODEST[location])
                                        else:
                                            if self.outdir is None:
                                                raise StimelaCabParameterError(
                                                    "You have specified output files, but have not specified an output folder")
                                            joined_str += "{0}/".format(IODEST[location])

                                param.value.append(joined_str)
                            elif isinstance(_value, str):
                                val = _value.split(":")
                                if len(val) == 2:
                                    if val[1] not in IODEST.keys():
                                        raise StimelaCabParameterError('The location \'{0}\' specified for parameter \'{1}\', is unknown. Choices are {2}'.format(
                                            val[1], param.name, IODEST.keys()))
                                    self.log.info("Location of '{0}' was specified as '{1}'. Will overide default.".format(
                                        param.name, val[1]))
                                    _value = val[0]
                                    location = val[1]
                                else:
                                    location = param.io

                                if location in ["input", "msfile"]:
                                    if location == "input" and self.indir is None:
                                        raise StimelaCabParameterError(
                                            "You have specified input files, but have not specified an input folder")
                                    if location == "msfile" and self.msdir is None:
                                        raise StimelaCabParameterError(
                                            "You have specified MS files, but have not specified an MS folder")

                                    path = "{0}/{1}".format(self.indir if location ==
                                                            "input" else self.msdir, _value)
                                    if param.check_io and not os.path.exists(path):
                                        raise StimelaCabParameterError("File '{0}' for parameter '{1}' could not be located at '{2}'.".format(
                                            _value, param.name, path))
                                    param.value.append(
                                        "{0}/{1}".format(IODEST[location], _value))
                                else:
                                    if self.outdir is None:
                                        raise StimelaCabParameterError(
                                            "You have specified output files, but have not specified an output folder")
                                    param.value.append(
                                        "{0}/{1}".format(IODEST[location], _value))
                            else:
                                raise StimelaCabParameterError("io parameter must either be a pathformatter object or a string")
                        if len(param.value) == 1:
                            param.value = param.value[0]

                    else: # not io type
                        if isinstance(value, pathformatter):
                            raise StimelaCabParameterError("Path formatter type specified, but {} is not io".format(param.name))

                        self.log.debug(
                            "Validating parameter {}".format(param.name))
                        param.validate(value)
                        param.value = value
            if not found:
                raise StimelaCabParameterError(
                    "Parameter {0} is unknown. Run 'stimela cabs -i {1}' to get help on this cab".format(name, self.task))
        conf = {}
        conf.update(self.toDict())
        utils.writeJson(saveconf, conf)
        self.log.info(f"Parameters validated and saved to {saveconf}")
Пример #2
0
    def run(self, steps=None, resume=False, redo=None):
        """
        Run a Stimela recipe. 

        steps   :   recipe steps to run
        resume  :   resume recipe from last run
        redo    :   Re-run an old recipe from a .last file
        """

        recipe = {
            "name":   self.name,
            "steps":   []
        }
        start_at = 0

        if redo:
            self.log.error("This feature has been depricated")
            raise SystemExit

        elif resume:
            #TODO(sphe) Need to re-think how best to do this
            self.log.error("This feature has been depricated")
            raise SystemExit

        if getattr(steps, '__iter__', False):
            _steps = []
            if isinstance(steps[0], str):
                labels = [job.label.split('::')[0] for job in self.jobs]

                for step in steps:
                    try:
                        _steps.append(labels.index(step)+1)
                    except ValueError:
                        raise StimelaCabParameterError(
                            'Recipe label ID [{0}] doesn\'t exist'.format(step))
                steps = _steps
        else:
            steps = range(1, len(self.jobs)+1)
        jobs = [(step, self.jobs[step-1]) for step in steps]

        # TIMESTR = "%Y-%m-%d %H:%M:%S"
        # TIMESTR = "%H:%M:%S"
        for i, (step, job) in enumerate(jobs):
            start_time = datetime.now()
            job.log.info('job started at {}'.format(start_time),
                          # the extra attributes are filtered by e.g. the CARACal logger
                          extra=dict(stimela_job_state=(job.name, "running")))

            self.log.info('STEP {0} :: {1}'.format(i+1, job.label))
            self.active = job
            try:
                with open(job.logfile, 'a') as astd:
                    astd.write('\n-----------------------------------\n')
                    astd.write(
                        'Stimela version     : {}\n'.format(version))
                    astd.write(
                        'Cab name            : {}\n'.format(job.image))
                    astd.write('-------------------------------------\n')
                job.run_job()
                # raise exception if wranglers declared the job a failure
                if job.declare_status is False:
                    raise StimelaRecipeExecutionError("job declared as failed")

                self.log2recipe(job, recipe, step, 'completed')
                self.completed.append(job)

                finished_time = datetime.now()
                job.log.info('job complete at {} after {}'.format(finished_time, finished_time-start_time),
                              # the extra attributes are filtered by e.g. the CARACal logger
                              extra=dict(stimela_job_state=(job.name, "complete")))

            except (utils.StimelaCabRuntimeError,
                    StimelaRecipeExecutionError,
                    StimelaCabParameterError) as exc:
                # ignore exceptions if wranglers declared the job a success
                if job.declare_status is True:
                    finished_time = datetime.now()
                    job.log.info('job complete (declared successful) at {} after {}'.format(finished_time, finished_time - start_time),
                                 # the extra attributes are filtered by e.g. the CARACal logger
                                 extra=dict(stimela_job_state=(job.name, "complete")))
                    continue

                self.remaining = [jb[1] for jb in jobs[i+1:]]
                self.failed = job

                finished_time = datetime.now()
                job.log.error(str(exc), extra=dict(stimela_job_state=(job.name, "failed"), boldface=True))
                job.log.error('job failed at {} after {}'.format(finished_time, finished_time-start_time),
                                extra=dict(stimela_job_state=(job.name, "failed"), color=None))
                for line in traceback.format_exc().splitlines():
                    job.log.error(line, extra=dict(traceback_report=True))

                self.log.info('Completed jobs : {}'.format(
                    [c.name for c in self.completed]))
                self.log.info('Remaining jobs : {}'.format(
                    [c.name for c in self.remaining]))

                self.log2recipe(job, recipe, step, 'failed')
                for step, jb in jobs[i+1:]:
                    self.log.info(
                        'Logging remaining task: {}'.format(jb.label))
                    self.log2recipe(jb, recipe, step, 'remaining')

                self.log.info(
                    'Saving pipeline information in {}'.format(self.resume_file))
                utils.writeJson(self.resume_file, recipe)

                # raise pipeline exception. Original exception context is discarded by "from None" (since we've already
                # logged it above, we don't need to include it with the new exception)
                raise PipelineException(exc, self.completed, job, self.remaining) from None

        self.log.info(
            'Saving pipeline information in {}'.format(self.resume_file))
        utils.writeJson(self.resume_file, recipe)
        self.log.info('Recipe executed successfully')

        return 0
Пример #3
0
    def run(self, steps=None, resume=False, redo=None):
        """
        Run a Stimela recipe. 

        steps   :   recipe steps to run
        resume  :   resume recipe from last run
        redo    :   Re-run an old recipe from a .last file
        """

        recipe = {
            "name":   self.name,
            "steps":   []
        }
        start_at = 0

        if redo:
            recipe = utils.readJson(redo)
            self.log.info('Rerunning recipe {0} from {1}'.format(
                recipe['name'], redo))
            self.log.info('Recreating recipe instance..')
            self.jobs = []
            for step in recipe['steps']:

                #        add I/O folders to the json file
                #        add a string describing the contents of these folders
                #        The user has to ensure that these folders exist, and have the required content
                if step['jtype'] == 'docker':
                    self.log.info('Adding job \'{0}\' to recipe. The container will be named \'{1}\''.format(
                        step['cab'], step['name']))
                    cont = docker.Container(step['cab'], step['name'],
                                            label=step['label'], logger=self.log,
                                            shared_memory=step['shared_memory'],
                                            workdir=WORKDIR)

                    self.log.debug('Adding volumes {0} and environmental variables {1}'.format(
                        step['volumes'], step['environs']))
                    cont.volumes = step['volumes']
                    cont.environs = step['environs']
                    cont.shared_memory = step['shared_memory']
                    cont.input_content = step['input_content']
                    cont.msdir_content = step['msdir_content']
                    cont.logfile = step['logfile']
                    job = StimelaJob(
                        step['name'], recipe=self, label=step['label'], cabpath=self.cabpath)
                    job.job = cont
                    job.jtype = 'docker'
                elif step['jtype'] == 'function':
                    name = step['name']
                    func = inspect.currentframe(
                    ).f_back.f_locals[step['function']]
                    job = StimelaJob(name, recipe=self, label=step['label'])
                    job.python_job(func, step['parameters'])
                    job.jtype = 'function'

                self.jobs.append(job)

        elif resume:
            self.log.info("Resuming recipe from last run.")
            try:
                recipe = utils.readJson(self.resume_file)
            except IOError:
                raise StimelaRecipeExecutionError(
                    "Cannot resume pipeline, resume file '{}' not found".format(self.resume_file))

            steps_ = recipe.pop('steps')
            recipe['steps'] = []
            _steps = []
            for step in steps_:
                if step['status'] == 'completed':
                    recipe['steps'].append(step)
                    continue

                label = step['label']
                number = step['number']

                # Check if the recipe flow has changed
                if label == self.jobs[number-1].label:
                    self.log.info(
                        'recipe step \'{0}\' is fit for re-execution. Label = {1}'.format(number, label))
                    _steps.append(number)
                else:
                    raise StimelaRecipeExecutionError(
                        'Recipe flow, or task scheduling has changed. Cannot resume recipe. Label = {0}'.format(label))

            # Check whether there are steps to resume
            if len(_steps) == 0:
                self.log.info(
                    'All the steps were completed. No steps to resume')
                sys.exit(0)
            steps = _steps

        if getattr(steps, '__iter__', False):
            _steps = []
            if isinstance(steps[0], str):
                labels = [job.label.split('::')[0] for job in self.jobs]

                for step in steps:
                    try:
                        _steps.append(labels.index(step)+1)
                    except ValueError:
                        raise StimelaCabParameterError(
                            'Recipe label ID [{0}] doesn\'t exist'.format(step))
                steps = _steps
        else:
            steps = range(1, len(self.jobs)+1)
        jobs = [(step, self.jobs[step-1]) for step in steps]

        # TIMESTR = "%Y-%m-%d %H:%M:%S"
        # TIMESTR = "%H:%M:%S"

        for i, (step, job) in enumerate(jobs):
            start_time = datetime.now()
            job.log.info('job started at {}'.format(start_time),
                          # the extra attributes are filtered by e.g. the CARACal logger
                          extra=dict(stimela_job_state=(job.name, "running")))

            self.log.info('STEP {0} :: {1}'.format(i+1, job.label))
            self.active = job
            try:
                with open(job.logfile, 'a') as astd:
                    astd.write('\n-----------------------------------\n')
                    astd.write(
                        'Stimela version     : {}\n'.format(version))
                    astd.write(
                        'Cab name            : {}\n'.format(job.image))
                    astd.write('-------------------------------------\n')
                job.run_job()

                self.log2recipe(job, recipe, step, 'completed')
                self.completed.append(job)

                finished_time = datetime.now()
                job.log.info('job complete at {} after {}'.format(finished_time, finished_time-start_time),
                              # the extra attributes are filtered by e.g. the CARACal logger
                              extra=dict(stimela_job_state=(job.name, "complete")))

            except (utils.StimelaCabRuntimeError,
                    StimelaRecipeExecutionError,
                    StimelaCabParameterError) as e:
                self.remaining = [jb[1] for jb in jobs[i+1:]]
                self.failed = job

                finished_time = datetime.now()
                job.log.error(str(e), extra=dict(stimela_job_state=(job.name, "failed"), boldface=True))
                job.log.error('job failed at {} after {}'.format(finished_time, finished_time-start_time),
                                extra=dict(stimela_job_state=(job.name, "failed"), color=None))
                for line in traceback.format_exc().splitlines():
                    job.log.error(line, extra=dict(traceback_report=True))

                self.log.info('Completed jobs : {}'.format(
                    [c.name for c in self.completed]))
                self.log.info('Remaining jobs : {}'.format(
                    [c.name for c in self.remaining]))

                self.log2recipe(job, recipe, step, 'failed')
                for step, jb in jobs[i+1:]:
                    self.log.info(
                        'Logging remaining task: {}'.format(jb.label))
                    self.log2recipe(jb, recipe, step, 'remaining')

                self.log.info(
                    'Saving pipeline information in {}'.format(self.resume_file))
                utils.writeJson(self.resume_file, recipe)

                # raise pipeline exception. Original exception context is discarded by "from None" (since we've already
                # logged it above, we don't need to include it with the new exception)
                raise PipelineException(e, self.completed, job, self.remaining) from None

        self.log.info(
            'Saving pipeline information in {}'.format(self.resume_file))
        utils.writeJson(self.resume_file, recipe)
        self.log.info('Recipe executed successfully')

        return 0
Пример #4
0
    def run(self, steps=None, resume=False, redo=None):
        """
        Run a Stimela recipe. 

        steps   :   recipe steps to run
        resume  :   resume recipe from last run
        redo    :   Re-run an old recipe from a .last file
        """

        recipe = {
            "name":   self.name,
            "steps":   []
        }
        start_at = 0

        if redo:
            recipe = utils.readJson(redo)
            self.log.info('Rerunning recipe {0} from {1}'.format(
                recipe['name'], redo))
            self.log.info('Recreating recipe instance..')
            self.jobs = []
            for step in recipe['steps']:

                #        add I/O folders to the json file
                #        add a string describing the contents of these folders
                #        The user has to ensure that these folders exist, and have the required content
                if step['jtype'] == 'docker':
                    self.log.info('Adding job \'{0}\' to recipe. The container will be named \'{1}\''.format(
                        step['cab'], step['name']))
                    cont = docker.Container(step['cab'], step['name'],
                                            label=step['label'], logger=self.log,
                                            shared_memory=step['shared_memory'])

                    self.log.debug('Adding volumes {0} and environmental variables {1}'.format(
                        step['volumes'], step['environs']))
                    cont.volumes = step['volumes']
                    cont.environs = step['environs']
                    cont.shared_memory = step['shared_memory']
                    cont.input_content = step['input_content']
                    cont.msdir_content = step['msdir_content']
                    cont.logfile = step['logfile']
                    job = StimelaJob(
                        step['name'], recipe=self, label=step['label'])
                    job.job = cont
                    job.jtype = 'docker'

                elif step['jtype'] == 'function':
                    name = step['name']
                    func = inspect.currentframe(
                    ).f_back.f_locals[step['function']]
                    job = StimelaJob(name, recipe=self, label=step['label'])
                    job.python_job(func, step['parameters'])
                    job.jtype = 'function'

                self.jobs.append(job)

        elif resume:
            self.log.info("Resuming recipe from last run.")
            try:
                recipe = utils.readJson(self.resume_file)
            except IOError:
                raise StimelaRecipeExecutionError(
                    "Cannot resume pipeline, resume file '{}' not found".format(self.resume_file))

            steps_ = recipe.pop('steps')
            recipe['steps'] = []
            _steps = []
            for step in steps_:
                if step['status'] == 'completed':
                    recipe['steps'].append(step)
                    continue

                label = step['label']
                number = step['number']

                # Check if the recipe flow has changed
                if label == self.jobs[number-1].label:
                    self.log.info(
                        'recipe step \'{0}\' is fit for re-execution. Label = {1}'.format(number, label))
                    _steps.append(number)
                else:
                    raise StimelaRecipeExecutionError(
                        'Recipe flow, or task scheduling has changed. Cannot resume recipe. Label = {0}'.format(label))

            # Check whether there are steps to resume
            if len(_steps) == 0:
                self.log.info(
                    'All the steps were completed. No steps to resume')
                sys.exit(0)
            steps = _steps

        if getattr(steps, '__iter__', False):
            _steps = []
            if isinstance(steps[0], str):
                labels = [job.label.split('::')[0] for job in self.jobs]

                for step in steps:
                    try:
                        _steps.append(labels.index(step)+1)
                    except ValueError:
                        raise StimelaCabParameterError(
                            'Recipe label ID [{0}] doesn\'t exist'.format(step))
                steps = _steps
        else:
            steps = range(1, len(self.jobs)+1)

        jobs = [(step, self.jobs[step-1]) for step in steps]

        for i, (step, job) in enumerate(jobs):

            self.log.info('Running job {}'.format(job.name))
            self.log.info('STEP {0} :: {1}'.format(i+1, job.label))
            self.active = job
            try:
                if job.jtype == 'function':
                    job.run_python_job()
                elif job.jtype in ['docker', 'singularity', 'udocker', 'podman']:
                    with open(job.job.logfile, 'a') as astd:
                        astd.write('\n-----------------------------------\n')
                        astd.write(
                            'Stimela version     : {}\n'.format(version))
                        astd.write(
                            'Cab name            : {}\n'.format(job.job.image))
                        astd.write('-------------------------------------\n')

                    run_job = getattr(job, "run_{0:s}_job".format(job.jtype))
                    run_job()

                self.log2recipe(job, recipe, step, 'completed')

            except (utils.StimelaCabRuntimeError,
                    StimelaRecipeExecutionError,
                    StimelaCabParameterError) as e:
                self.completed = [jb[1] for jb in jobs[:i]]
                self.remaining = [jb[1] for jb in jobs[i+1:]]
                self.failed = job

                self.log.info(
                    'Recipe execution failed while running job {}'.format(job.name))
                self.log.info('Completed jobs : {}'.format(
                    [c.name for c in self.completed]))
                self.log.info('Remaining jobs : {}'.format(
                    [c.name for c in self.remaining]))

                self.log2recipe(job, recipe, step, 'failed')
                for step, jb in jobs[i+1:]:
                    self.log.info(
                        'Logging remaining task: {}'.format(jb.label))
                    self.log2recipe(jb, recipe, step, 'remaining')

                self.log.info(
                    'Saving pipeline information in {}'.format(self.resume_file))
                utils.writeJson(self.resume_file, recipe)

                pe = PipelineException(e, self.completed, job, self.remaining)
                raise_(pe, None, sys.exc_info()[2])
            except:
                import traceback
                traceback.print_exc()
                raise RuntimeError(
                    "An unhandled exception has occured. This is a bug, please report")

            finally:
                if job.jtype == 'singularity' and job.created:
                    job.job.stop()

        self.log.info(
            'Saving pipeline information in {}'.format(self.resume_file))
        utils.writeJson(self.resume_file, recipe)

        self.log.info('Recipe executed successfully')

        return 0
Пример #5
0
    def update(self, options, saveconf):
        required = filter(lambda a: a.required, self.parameters)
        for param0 in required:
            if param0.name in options.keys() == False and param0.mapping in options.keys() == False:
                raise RuntimeError("Parameter {} is required but has not been specified".format(param0.name))

        self.log.info("Validating parameters...       CAB = {0}".format(self.task))
        for name,value in options.items():
            found = False
            for param in self.parameters:
                if name in [param.name, param.mapping]:
                    found = True
                    if param.io:
                        if value is None:
                            continue
                        param.validate(value)
                        param.value = []
                        if hasattr(value, "__iter__") and not isinstance(value, str):
                            print(value)
#                            pass
                        else:
                            value = [value]
                            print(value)
                        for _value in value:
                            val = _value.split(":")
                            if len(val)==2:
                                if val[1] not in IODEST.keys():
                                    raise IOError('The location \'{0}\' specified for parameter \'{1}\', is unknown. Choices are {2}'.format(val[1], param.name, IODEST.keys()))
                                self.log.info("Location of '{0}' was specified as '{1}'. Will overide default.".format(param.name, val[1]))
                                _value = val[0]
                                location = val[1]
                            else:
                                location = param.io

                            if location in ["input", "msfile"]:
                                if location == "input" and self.indir is None:
                                    raise IOError("You have specified input files, but have not specified an input folder")
                                if location == "msfile" and self.msdir is None:
                                    raise IOError("You have specified MS files, but have not specified an MS folder")

                                path = "{0}/{1}".format(self.indir if location=="input" else self.msdir, _value)
                                if param.check_io and not os.path.exists(path):
                                    raise IOError("File '{0}' for parameter '{1}' could not be located at '{2}'.".format(_value, param.name, path))
                                param.value.append ("{0}/{1}".format(IODEST[location], _value))
                            else:
                                if self.outdir is None:
                                    raise IOError("You have specified output files, but have not specified an output folder")
                                param.value.append("{0}/{1}".format(IODEST[location], _value))
                        if len(param.value)==1:
                            param.value = param.value[0]
                        
                    else:
                        self.log.debug("Validating paramter {}".format(param.name))
                        param.validate(value)
                        param.value = value
            if not found:
                raise RuntimeError("Parameter {0} is unknown. Run 'stimela cabs -i {1}' to get help on this cab".format(name, self.task))
        conf = {}
        conf.update(self.toDict())
        utils.writeJson(saveconf, conf)
        self.log.info("Parameters validated and saved. Parameter file is: {}".format(saveconf))