Пример #1
0
def create_dags(suite_data, args, dags_folder=None):
    """
    Iterates over "suite_data" and creates new DAGs. Tries to include
    all tools into the worfklow before sending it to the API server.
    If loaded tool is not Workflow, send it unchanged. It's safe to
    not process errors when we failed to add new DAG. Airflow Scheduler
    will parse all dags at the end of the next "dag_dir_list_interval"
    from airflow.cfg. If args.embed was True, send base64 encoded gzip
    compressed content of the workflow file instead of attaching it.
    In case we failed to load and parse worklfow, sets "finished" to
    True and writes reason to "error".
    """

    # TODO: Do we need to force scheduler to reload DAGs after all DAG added?

    for test_data in suite_data.values():
        params = {"dag_id": test_data["dag_id"]}
        workflow_path = os.path.join(
            args.tmp,
            os.path.basename(test_data["tool"])
        )
        try:
            embed_all_runs(                                                                           # will save results to "workflow_path"
                workflow_tool=fast_cwl_load(test_data["tool"]),
                location=workflow_path
            )
        except Exception as ex:
            logging.error(f"Failed to load test case to run {test_data['tool']}")
            test_data.update({
                "error": "Failed to load test case",
                "finished": True
            })
            continue

        with open(workflow_path, "rb") as input_stream:
            logging.info(f"Add DAG {test_data['dag_id']} from test case {test_data['index']}")

            if args.embed:                                                                            # send base64 encoded gzip compressed workflow content that will be embedded into DAG python file
                logging.info(f"Sending base64 encoded gzip compressed content from {workflow_path}")
                r = requests.post(
                    url=urljoin(args.api, "/api/experimental/dags"),
                    params=params,
                    json={"workflow_content": get_compressed(input_stream)}
                )
            else:                                                                                     # attach workflow as a file
                logging.info(f"Attaching workflow file {workflow_path}")
                r = requests.post(
                    url=urljoin(args.api, "/api/experimental/dags"),
                    params=params,
                    files={"workflow": input_stream}
                )

            # Check if we failed to add new DAG. One reason to fail - DAG hase been
            # already added. It's safe to ignore this error. In case more serious
            # reasons, they will be caught on the "trigger_dags" step

            if not r.ok:
                reason = get_api_failure_reason(r)
                logging.info(f"Failed to add DAG {test_data['dag_id']} from test case {test_data['index']} due to \n {reason}")
Пример #2
0
    def export_dag(self, dag_id):
        """
        Checks if DAG python file with the same name has been already
        exported. If not, checks if exaclty one of "workflow" and
        "workflow_content" parameters are present in the request. In
        case of "workflow_content" first we need to load a tool from
        it and try to convert it to Workflow (what if it was
        CommandLineTool), then compress it again and write to DAG
        python file. In case of "workflow", first we need to save
        attachment, then try to comvert it to Workflow (the same reason
        as above) and write it to DAG python file.
        """

        dag_path = path.join(DAGS_FOLDER, dag_id + ".py")

        if path.isfile(dag_path):
            raise FileExistsError(f"File {dag_path} already exist")

        if "workflow_content" in (connexion.request.json or []) \
            and "workflow" in connexion.request.files:

            raise ValueError("Only one of the 'workflow' or \
                'workflow_content' parameters can be set")

        if "workflow_content" in (connexion.request.json or []):    # json field might be None, need to take [] as default

            workflow = get_compressed(
                convert_to_workflow(                                # to make sure we are not saving CommandLineTool instead of a Workflow
                    command_line_tool=fast_cwl_load(                # using fast_cwl_load is safe here because we deal with the content of a file
                        connexion.request.json["workflow_content"]
                    )
                )
            )

        elif "workflow" in connexion.request.files:

            workflow = path.join(DAGS_FOLDER, dag_id + ".cwl")
            self.save_attachment("workflow", workflow)
            convert_to_workflow(
                command_line_tool=slow_cwl_load(                    # safer to use slow_cwl_load, because of the possible confusions with all these renaming. TODO: make it less complicate
                    workflow=workflow,
                    only_tool=True
                ),
                location=workflow
            )

        else:

            raise ValueError("At least one of the 'workflow' or \
                'workflow_content' parameters should be set")

        with open(dag_path, "w") as output_stream:
            output_stream.write(DAG_TEMPLATE.format(workflow, dag_id))

        return {"dag_id": dag_id, "cwl_path": workflow, "dag_path": dag_path}
Пример #3
0
def trigger_dags(suite_data, args):
    """
    Triggers all DAGs from "suite_data". If failed to trigger DAG, updates
    "suite_data" with "error" and sets "finished" to True. In case --combine
    was set, we will call API that will first create the new DAG, then clean
    all previous DAG runs based on the provided run_id and dag_id, then remove
    outdated DAGs for the same workflow (for that dag_id should follow naming
    rule cwlid-commitsha) and only after that trigger the workflow execution.
    If not only --combine but also --embed was provided, send base64 encoded
    gzip compressed content of the workflow file instead of attaching it.
    """

    for run_id, test_data in suite_data.items():
        params = {
            "run_id": run_id,
            "dag_id": test_data["dag_id"],
            "conf": json.dumps({"job": test_data["job"]})
        }
        if args.combine:  # use API endpoint that combines both creating, cleaning and triggerring new DAGs
            logging.info(f"Add and trigger DAG {test_data['dag_id']} from test case {test_data['index']} as {run_id}")
            workflow_path = os.path.join(
                args.tmp,
                os.path.basename(test_data["tool"])
            )
            embed_all_runs(                                                                               # will save results to "workflow_path"
                workflow_tool=fast_cwl_load(test_data["tool"]),
                location=workflow_path
            )
            with open(workflow_path, "rb") as input_stream:
                if args.embed:                                                                            # send base64 encoded gzip compressed workflow content that will be embedded into DAG python file
                    logging.info(f"Sending base64 encoded gzip compressed content from {workflow_path}")
                    r = requests.post(
                        url=urljoin(args.api, "/api/experimental/dags/dag_runs"),
                        params=params,
                        json={"workflow_content": get_compressed(input_stream)}
                    )
                else:                                                                                     # attach workflow as a file
                    logging.info(f"Attaching workflow file {workflow_path}")
                    r = requests.post(
                        url=urljoin(args.api, "/api/experimental/dags/dag_runs"),
                        params=params,
                        files={"workflow": input_stream}
                    )
        else:
            logging.info(f"Trigger DAG {test_data['dag_id']} from test case {test_data['index']} as {run_id}")
            r = requests.post(
                url=urljoin(args.api, "/api/experimental/dag_runs"),
                params=params
            )
        if not r.ok:
            reason = get_api_failure_reason(r)
            logging.error(f"Failed to trigger DAG {test_data['dag_id']} from test case {test_data['index']} as {run_id} due to {reason}")
            test_data["error"] = reason
            test_data["finished"] = True
Пример #4
0
    def __init__(
            self,
            dag_id,  # the id of the DAG
            workflow,  # absolute path to the CWL workflow file or utf-8 string to include base64 encoded zlib compressed utf-8 workflow file content
            dispatcher=None,  # custom job dispatcher. Will be assigned automatically to the same DAG. Default CWLJobDispatcher
            gatherer=None,  # custom job gatherer. Will be assigned automatically to the same DAG. Default CWLJobGatherer
            *args,
            **kwargs  # see DAG class for additional parameters
    ):
        """
        Updates kwargs with the required defaults if they were not explicitely provided
        by user. dispatcher and gatherer are set to CWLJobDispatcher() and CWLJobGatherer()
        if those were not provided by user. If user sets his own operators for dispatcher
        and gatherer, "default_args" will not be inherited. User needs to set up proper
        agruments by himself. Also, dag results will not be posted from the custom dispatcher.
        """

        self.workflow = workflow
        self.__setup_params(kwargs)

        super().__init__(dag_id=dag_id, *args, **kwargs)

        self.workflow_tool = fast_cwl_load(  # keeps only the tool (CommentedMap object)
            workflow=self.workflow,
            cwl_args=kwargs["default_args"]
            ["cwl"]  # in case user has overwritten some of the default parameters
        )

        self.dispatcher = CWLJobDispatcher(
            dag=
            self,  # need dag=self otherwise new operator will not get proper default_args
            task_id="CWLJobDispatcher") if dispatcher is None else dispatcher

        self.gatherer = CWLJobGatherer(
            dag=
            self,  # need dag=self otherwise new operator will not get proper default_args
            task_id="CWLJobGatherer") if gatherer is None else gatherer

        self.__assemble()