示例#1
0
    def __init__(self,
                 dag_id=None,
                 cwl_workflow=None,
                 default_args={},
                 schedule_interval=None,
                 *args,
                 **kwargs):

        self.top_task = None
        self.bottom_task = None
        self.cwlwf = self.quick_load_cwl(cwl_workflow)

        kwargs.update({
            "on_failure_callback":
            kwargs.get("on_failure_callback", dag_on_failure),
            "on_success_callback":
            kwargs.get("on_success_callback", dag_on_success)
        })

        # parameters that cannot be overwritten in default_args
        default_args.update({
            'singularity':
            conf_get_default('cwl', 'singularity',
                             default_args.get('singularity', False)),
            'use_container':
            conf_get_default('cwl', 'use_container',
                             default_args.get('use_container', True))
        })

        init_default_args = {
            'start_date': days_ago(14),
            'email_on_failure': False,
            'email_on_retry': False,
            'end_date': None,
            'tmp_folder': conf_get_default('cwl', 'tmp_folder', '/tmp'),
            'basedir': conf_get_default('cwl', 'tmp_folder', '/tmp'),
            'no_match_user': conf_get_default('cwl', 'no_match_user', False),
            'task_retries': conf_get_default('cwl', 'retry', 1),
            'quiet': False,
            'strict': False,
            'on_error': 'continue',
            'skip_schemas': True,
            'cwl_workflow': cwl_workflow
        }

        init_default_args.update(default_args)
        merged_default_args = get_default_args()
        merged_default_args.update(init_default_args)

        super(self.__class__, self).__init__(
            dag_id=dag_id if dag_id else
            urllib.parse.urldefrag(cwl_workflow)[0].split("/")[-1].replace(
                ".cwl", "").replace(".", "_dot_"),
            default_args=merged_default_args,
            schedule_interval=schedule_interval,
            *args,
            **kwargs)
示例#2
0
    def __init__(
            self,
            dag_id=None,
            cwl_workflow=None,
            default_args=None,
            schedule_interval=None,
            *args, **kwargs):

        self.top_task = None
        self.bottom_task = None
        self.cwlwf = None
        self.requirements = None

        tmp_folder = conf_get_default('cwl', 'tmp_folder', '/tmp')

        _default_args = {
            'start_date': utcnow(),
            'email_on_failure': False,
            'email_on_retry': False,
            'end_date': None,

            'tmp_folder': tmp_folder,
            'basedir': tmp_folder,

            'print_deps': False,
            'print_pre': False,
            'print_rdf': False,
            'print_dot': False,
            'relative_deps': False,
            'use_container': True,
            'rm_container': True,
            'enable_pull': True,
            'preserve_environment': ["PATH"],
            'preserve_entire_environment': False,
            'print_input_deps': False,
            'cachedir': None,
            'rm_tmpdir': True,
            'move_outputs': 'move',
            'eval_timeout': 20,
            'quiet': False,
            'version': False,
            'enable_dev': False,
            'enable_ext': False,
            'strict': False,
            'rdf_serializer': None,
            'tool_help': False,
            'pack': False,
            'on_error': 'continue',
            'relax_path_checks': False,
            'validate': False,
            'compute_checksum': True,
            'skip_schemas': True,
            'no_match_user': False,
        }

        _default_args.update(default_args if default_args else {})
        _d = get_default_args()
        _d.update(_default_args)


        self.cwl_workflow = cwl_workflow if cwl_workflow else _default_args["cwl_workflow"]

        _dag_id = dag_id if dag_id else urllib.parse.urldefrag(self.cwl_workflow)[0].split("/")[-1] \
            .replace(".cwl", "").replace(".", "_dot_")

        super(self.__class__, self).__init__(dag_id=_dag_id,
                                             default_args=_d,
                                             schedule_interval=schedule_interval, *args, **kwargs)
示例#3
0
def get_default_cwl_args(preset_cwl_args=None):
    """
    Returns default arguments required by cwltool's functions with a few
    parameters added and overwritten (required by CWL-Airflow). Defaults
    can be preset through "preset_cwl_args" if provided. All new fields
    from "preset_cwl_args" will be added to the returned results.
    """

    preset_cwl_args = {} if preset_cwl_args is None else deepcopy(
        preset_cwl_args)

    # default arguments required by cwltool
    required_cwl_args = get_default_args()

    # update default arguments required by cwltool with those that were preset by user
    required_cwl_args.update(preset_cwl_args)

    # update default arguments required by cwltool with those that might
    # be updated based on the higher priority of airflow configuration
    # file. If airflow configuration file doesn't include correspondent
    # parameters, use those that were preset by user, or defaults
    required_cwl_args.update({
        "tmp_folder":
        get_dir(
            conf_get("cwl", "tmp_folder",
                     preset_cwl_args.get("tmp_folder", CWL_TMP_FOLDER))),
        "outputs_folder":
        get_dir(  # for CWL-Airflow to store outputs if "outputs_folder" is not overwritten in job
            conf_get("cwl", "outputs_folder",
                     preset_cwl_args.get("outputs_folder",
                                         CWL_OUTPUTS_FOLDER))),
        "inputs_folder":
        get_dir(  # for CWL-Airflow to resolve relative locations for input files if job was loaded from parsed object
            conf_get("cwl", "inputs_folder",
                     preset_cwl_args.get("inputs_folder", CWL_INPUTS_FOLDER))),
        "pickle_folder":
        get_dir(  # for CWL-Airflow to store pickled workflows
            conf_get("cwl", "pickle_folder",
                     preset_cwl_args.get("pickle_folder", CWL_PICKLE_FOLDER))),
        "use_container":
        conf_get(
            "cwl",
            "use_container",
            preset_cwl_args.get(
                "use_container",
                CWL_USE_CONTAINER)  # execute jobs in docker containers
        ),
        "no_match_user":
        conf_get(
            "cwl",
            "no_match_user",
            preset_cwl_args.get(
                "no_match_user", CWL_NO_MATCH_USER
            )  # disables passing the current uid to "docker run --user"
        ),
        "skip_schemas":
        conf_get(
            "cwl",
            "skip_schemas",
            preset_cwl_args.get(
                "skip_schemas", CWL_SKIP_SCHEMAS
            )  # it looks like this doesn't influence anything in the latest cwltool
        ),
        "strict":
        conf_get("cwl", "strict", preset_cwl_args.get("strict", CWL_STRICT)),
        "quiet":
        conf_get("cwl", "quiet", preset_cwl_args.get("quiet", CWL_QUIET)),
        "rm_tmpdir":
        preset_cwl_args.get(
            "rm_tmpdir", CWL_RM_TMPDIR
        ),  # even if we can set it in "preset_cwl_args" it's better not to change
        "move_outputs":
        preset_cwl_args.get(
            "move_outputs", CWL_MOVE_OUTPUTS
        ),  # even if we can set it in "preset_cwl_args" it's better not to change
        "enable_dev":
        preset_cwl_args.get(
            "enable_dev", CWL_ENABLE_DEV
        )  # fails to run without it when creating workflow from tool. TODO: Ask Peter?
    })

    return required_cwl_args
示例#4
0
def load_cwl(cwl_file):
    load.loaders = {}
    loading_context = cwltool.context.LoadingContext(get_default_args())
    loading_context.construct_tool_object = default_make_tool
    loading_context.resolver = tool_resolver
    return load.load_tool(cwl_file, loading_context)