示例#1
0
def test_output_properties_in_memory():
    props = output_properties(content=b"hello world", basename="hello.txt")
    assert props["basename"] == "hello.txt"
    assert props["nameroot"] == "hello"
    assert props["nameext"] == ".txt"
    assert props["size"] == 11
    assert props["checksum"] == "sha1$2aae6c35c94fcfb415dbe95f408b9ce91ee846ed"
示例#2
0
def test_output_properties_path():
    f = tempfile.NamedTemporaryFile(mode="w")
    f.write("hello world")
    f.flush()

    props = output_properties(path=f.name, basename="hello.txt")
    assert props["basename"] == "hello.txt"
    assert props["nameroot"] == "hello"
    assert props["nameext"] == ".txt"
    assert props["size"] == 11
    assert props["checksum"] == "sha1$2aae6c35c94fcfb415dbe95f408b9ce91ee846ed"
示例#3
0
    def collect_outputs(self, ctx, output_directory):
        assert self._outputs_dict is None, "collect_outputs pre-condition violated"

        outputs_dict = {}
        if not output_directory:
            # TODO: rather than creating a directory just use
            # Galaxy paths if they are available in this
            # configuration.
            output_directory = tempfile.mkdtemp()

        def get_dataset(dataset_details, filename=None):
            parent_basename = dataset_details.get("cwl_file_name")
            if not parent_basename:
                parent_basename = dataset_details.get("name")
            file_ext = dataset_details["file_ext"]
            if file_ext == "directory":
                # TODO: rename output_directory to outputs_directory because we can have output directories
                # and this is confusing...
                the_output_directory = os.path.join(output_directory, parent_basename)
                safe_makedirs(the_output_directory)
                destination = self.download_output_to(dataset_details, the_output_directory, filename=filename)
            else:
                destination = self.download_output_to(dataset_details, output_directory, filename=filename)
            if filename is None:
                basename = parent_basename
            else:
                basename = os.path.basename(filename)

            return {"path": destination, "basename": basename}

        ctx.vlog("collecting outputs to directory %s" % output_directory)

        for runnable_output in get_outputs(self._runnable, gi=self._user_gi):
            output_id = runnable_output.get_id()
            if not output_id:
                ctx.vlog("Workflow output identified without an ID (label), skipping")
                continue
            output_dict_value = None
            is_cwl = self._runnable.type in [RunnableType.cwl_workflow, RunnableType.cwl_tool]
            output_src = self.output_src(runnable_output)
            if not is_cwl and output_src["src"] == "hda":
                output_dataset_id = output_src["id"]
                dataset = self._get_metadata("dataset", output_dataset_id)
                dataset_dict = get_dataset(dataset)
                ctx.vlog("populated destination [%s]" % dataset_dict["path"])

                if dataset["file_ext"] == "expression.json":
                    with open(dataset_dict["path"], "r") as f:
                        output_dict_value = json.load(f)
                else:
                    output_dict_value = output_properties(**dataset_dict)
            else:
                output_dataset_id = output_src["id"]
                galaxy_output = self.to_galaxy_output(runnable_output)
                cwl_output = output_to_cwl_json(
                    galaxy_output,
                    self._get_metadata,
                    get_dataset,
                    self._get_extra_files,
                    pseduo_location=True,
                )
                if is_cwl:
                    output_dict_value = cwl_output
                else:

                    def attach_file_properties(collection, cwl_output):
                        elements = collection["elements"]
                        assert len(elements) == len(cwl_output)
                        for element, cwl_output_element in zip(elements, cwl_output):
                            element["_output_object"] = cwl_output_element
                            if isinstance(cwl_output_element, list):
                                assert "elements" in element["object"]
                                attach_file_properties(element["object"], cwl_output_element)

                    output_metadata = self._get_metadata("dataset_collection", output_dataset_id)
                    attach_file_properties(output_metadata, cwl_output)
                    output_dict_value = output_metadata

            outputs_dict[output_id] = output_dict_value

        self._outputs_dict = outputs_dict
        ctx.vlog("collected outputs [%s]" % self._outputs_dict)
    def collect_outputs(self, ctx, output_directory):
        assert self._outputs_dict is None, "collect_outputs pre-condition violated"

        outputs_dict = {}
        if not output_directory:
            # TODO: rather than creating a directory just use
            # Galaxy paths if they are available in this
            # configuration.
            output_directory = tempfile.mkdtemp()

        def get_dataset(dataset_details, filename=None):
            parent_basename = dataset_details.get("cwl_file_name")
            if not parent_basename:
                parent_basename = dataset_details.get("name")
            file_ext = dataset_details["file_ext"]
            if file_ext == "directory":
                # TODO: rename output_directory to outputs_directory because we can have output directories
                # and this is confusing...
                the_output_directory = os.path.join(output_directory, parent_basename)
                safe_makedirs(the_output_directory)
                destination = self.download_output_to(dataset_details, the_output_directory, filename=filename)
            else:
                destination = self.download_output_to(dataset_details, output_directory, filename=filename)
            if filename is None:
                basename = parent_basename
            else:
                basename = os.path.basename(filename)

            return {"path": destination, "basename": basename}

        ctx.vlog("collecting outputs to directory %s" % output_directory)
        for runnable_output in get_outputs(self._runnable):
            output_id = runnable_output.get_id()
            if not output_id:
                ctx.vlog("Workflow output identified without an ID (label), skipping")
                continue
            output_dict_value = None
            if self._runnable.type in [RunnableType.cwl_workflow, RunnableType.cwl_tool]:
                galaxy_output = self.to_galaxy_output(runnable_output)
                cwl_output = output_to_cwl_json(
                    galaxy_output,
                    self._get_metadata,
                    get_dataset,
                    self._get_extra_files,
                    pseduo_location=True,
                )
                output_dict_value = cwl_output
            else:
                # TODO: deprecate this route for finding workflow outputs,
                # it is a brittle and bad approach...
                output_dataset_id = self.output_dataset_id(runnable_output)
                dataset = self._get_metadata("dataset", output_dataset_id)
                dataset_dict = get_dataset(dataset)
                ctx.vlog("populated destination [%s]" % dataset_dict["path"])

                if dataset["file_ext"] == "expression.json":
                    with open(dataset_dict["path"], "r") as f:
                        output_dict_value = json.load(f)
                else:
                    output_dict_value = output_properties(**dataset_dict)

            outputs_dict[output_id] = output_dict_value

        self._outputs_dict = outputs_dict
        ctx.vlog("collected outputs [%s]" % self._outputs_dict)