def wait_until_dag_validated(self, dag_path): """ Reads the md5 sum of the DAG python file to see whether it was updated. Searches by md5 sum if exactly the same DAG python file has been already validated. If it was already validated raises exception in case of "error" result or does nothing if validation was successfull. If validation check for the specific DAG python file is still running ("checking" status), sleeps for 1 second and checks status again. This approach prevents from running multiple processes for exactly the same DAG python file on each POST request. Instead of using "airflow list_dags -sd" that never ends with exit code other than 0, we use "python3". Environment is copied to the subprocess, so it should work fine even in portable CWL-Airflow installation """ dag_md5_sum = get_md5_sum(dag_path) if dag_md5_sum not in self.validated_dags: self.validated_dags[dag_md5_sum] = "checking" try: check_call(["python3", dag_path], env=environ.copy(), stdout=DEVNULL, stderr=DEVNULL) self.validated_dags[dag_md5_sum] = "success" except CalledProcessError: self.validated_dags[dag_md5_sum] = "error" while self.validated_dags[dag_md5_sum] not in ["success", "error"]: sleep(1) if self.validated_dags[dag_md5_sum] == "error": raise ValueError(f"Failed to load DAG from {dag_path}")
def fast_cwl_load(workflow, cwl_args=None): """ Tries to unpickle workflow from "pickle_folder" based on md5 sum of the "workflow" file. "cwl_args" can be used to update default location of "pickle_folder" as well as other parameters used by "slow_cwl_load" for loading and runtime contexts. If pickled file not found or failed to unpickle, load tool from the "workflow" using "slow_cwl_load" with "only_tool" set to True to return only tool. Returned tool will be pickled into "pickle_folder" with a basename generated from md5 sum of the "workflow" file. If "workflow" was already parsed into CommentedMap, return it unchanged. Nothing will be pickled """ cwl_args = {} if cwl_args is None else cwl_args if isinstance(workflow, CommentedMap): return workflow default_cwl_args = get_default_cwl_args(cwl_args) pickled_workflow = os.path.join(default_cwl_args["pickle_folder"], get_md5_sum(workflow) + ".p") try: with open(pickled_workflow, "rb") as input_stream: workflow_tool = pickle.load(input_stream) except (FileNotFoundError, pickle.UnpicklingError) as err: workflow_tool = slow_cwl_load(workflow=workflow, cwl_args=default_cwl_args, only_tool=True) with open(pickled_workflow, "wb") as output_stream: pickle.dump(workflow_tool, output_stream) return workflow_tool
def test_get_md5_sum(location, control_md5sum): md5sum = get_md5_sum(location) assert control_md5sum==md5sum, \ "Failed to calculate md5 sum"