Exemplo n.º 1
0
    def execute(self, context):

        post_status(context)

        _json = {}

        if 'job' in context['dag_run'].conf:
            logging.debug('{0}: dag_run conf: \n {1}'.format(
                self.task_id, context['dag_run'].conf['job']))
            _json = context['dag_run'].conf['job']

        cwl_context = self.cwl_dispatch(_json)
        if cwl_context:
            return cwl_context
        else:
            raise Exception("No cwl context")
Exemplo n.º 2
0
    def execute(self, context):

        post_status(context)

        self.cwlwf, it_is_workflow = load_cwl(
            self.dag.default_args["cwl_workflow"], self.dag.default_args)
        self.cwl_step = [
            step for step in self.cwlwf.steps
            if self.task_id == step.id.split("#")[-1]
        ][0] if it_is_workflow else self.cwlwf

        _logger.info('{0}: Running!'.format(self.task_id))

        upstream_task_ids = [t.task_id for t in self.upstream_list] + \
                            ([self.reader_task_id] if self.reader_task_id else [])
        _logger.debug('{0}: Collecting outputs from: \n{1}'.format(
            self.task_id, json.dumps(upstream_task_ids, indent=4)))

        upstream_data = self.xcom_pull(context=context,
                                       task_ids=upstream_task_ids)
        _logger.info('{0}: Upstream data: \n {1}'.format(
            self.task_id, json.dumps(upstream_data, indent=4)))

        promises = {}
        for data in upstream_data:  # upstream_data is an array with { promises and outdir }
            promises = merge(promises, data["promises"])
            if "outdir" in data:
                self.outdir = data["outdir"]

        _d_args = self.dag.default_args

        if not self.outdir:
            self.outdir = _d_args['tmp_folder']

        _logger.debug('{0}: Step inputs: {1}'.format(
            self.task_id, json.dumps(self.cwl_step.tool["inputs"], indent=4)))

        _logger.debug('{0}: Step outputs: {1}'.format(
            self.task_id, json.dumps(self.cwl_step.tool["outputs"], indent=4)))

        jobobj = {}

        for inp in self.cwl_step.tool["inputs"]:
            jobobj_id = shortname(inp["id"]).split("/")[-1]
            source_ids = []
            promises_outputs = []
            try:
                source_field = inp["source"] if it_is_workflow else inp.get(
                    "id")
                source_ids = [shortname(s)
                              for s in source_field] if isinstance(
                                  source_field,
                                  list) else [shortname(source_field)]
                promises_outputs = [
                    promises[source_id] for source_id in source_ids
                    if source_id in promises
                ]
            except:
                _logger.warning(
                    "{0}: Couldn't find source field in step input: {1}".
                    format(self.task_id, json.dumps(inp, indent=4)))

            _logger.info(
                '{0}: For input {1} with source_ids: {2} found upstream outputs: \n{3}'
                .format(self.task_id, jobobj_id, source_ids, promises_outputs))

            if len(promises_outputs) > 1:
                if inp.get("linkMerge", "merge_nested") == "merge_flattened":
                    jobobj[jobobj_id] = flatten(promises_outputs)
                else:
                    jobobj[jobobj_id] = promises_outputs
            # Should also check if [None], because in this case we need to take default value
            elif len(promises_outputs) == 1 and (promises_outputs[0]
                                                 is not None):
                jobobj[jobobj_id] = promises_outputs[0]
            elif "valueFrom" in inp:
                jobobj[jobobj_id] = None
            elif "default" in inp:
                d = copy.copy(inp["default"])
                jobobj[jobobj_id] = d
            else:
                continue

        _logger.debug('{0}: Collected job object: \n {1}'.format(
            self.task_id, json.dumps(jobobj, indent=4)))

        def _post_scatter_eval(shortio, cwl_step):
            _value_from = {
                shortname(i["id"]).split("/")[-1]: i["valueFrom"]
                for i in cwl_step.tool["inputs"] if "valueFrom" in i
            }
            _logger.debug('{0}: Step inputs with valueFrom: \n{1}'.format(
                self.task_id, json.dumps(_value_from, indent=4)))

            def value_from_func(k, v):
                if k in _value_from:
                    return expression.do_eval(_value_from[k],
                                              shortio,
                                              self.cwlwf.tool.get(
                                                  "requirements", []),
                                              None,
                                              None, {},
                                              context=v)
                else:
                    return v

            return {k: value_from_func(k, v) for k, v in shortio.items()}

        job = _post_scatter_eval(jobobj, self.cwl_step)
        _logger.info('{0}: Final job data: \n {1}'.format(
            self.task_id, json.dumps(job, indent=4)))

        _d_args['outdir'] = tempfile.mkdtemp(
            prefix=os.path.join(self.outdir, "step_tmp"))
        _d_args['tmpdir_prefix'] = os.path.join(_d_args['outdir'], 'cwl_tmp_')
        _d_args['tmp_outdir_prefix'] = os.path.join(_d_args['outdir'],
                                                    'cwl_outdir_')

        _d_args["record_container_id"] = True
        _d_args["cidfile_dir"] = _d_args['outdir']
        _d_args["cidfile_prefix"] = self.task_id

        _logger.debug('{0}: Runtime context: \n {1}'.format(self, _d_args))

        executor = SingleJobExecutor()
        runtimeContext = RuntimeContext(_d_args)
        runtimeContext.make_fs_access = getdefault(
            runtimeContext.make_fs_access, StdFsAccess)

        for inp in self.cwl_step.tool["inputs"]:
            if inp.get("not_connected"):
                del job[shortname(inp["id"].split("/")[-1])]

        _stderr = sys.stderr
        sys.stderr = sys.__stderr__
        (output, status) = executor(
            self.cwl_step.embedded_tool if it_is_workflow else self.cwl_step,
            job,
            runtimeContext,
            logger=_logger)
        sys.stderr = _stderr

        if not output and status == "permanentFail":
            raise ValueError

        _logger.debug('{0}: Embedded tool outputs: \n {1}'.format(
            self.task_id, json.dumps(output, indent=4)))

        promises = {}

        for out in self.cwl_step.tool["outputs"]:

            out_id = shortname(out["id"])
            jobout_id = out_id.split("/")[-1]
            try:
                promises[out_id] = output[jobout_id]
            except:
                continue

        # Unsetting the Generation from final output object
        visit_class(promises, ("File", ), MutationManager().unset_generation)

        data = {"promises": promises, "outdir": self.outdir}

        _logger.info('{0}: Output: \n {1}'.format(self.task_id,
                                                  json.dumps(data, indent=4)))

        return data
Exemplo n.º 3
0
 def execute(self, context):
     post_status(context)
     return self.cwl_gather(context)