Exemplo n.º 1
0
    def test_create_list_of_paired_datasets_in_history(self):
        history_id = self.gi.histories.create_history(
            name="TestDSListCreate")["id"]
        dataset1_id = self._test_dataset(history_id)
        dataset2_id = self._test_dataset(history_id)
        dataset3_id = self._test_dataset(history_id)
        dataset4_id = self._test_dataset(history_id)
        collection_response = self.gi.histories.create_dataset_collection(
            history_id=history_id,
            collection_description=collections.CollectionDescription(
                name="MyListOfPairedDatasets",
                type="list:paired",
                elements=[
                    collections.CollectionElement(
                        name="sample1",
                        type="paired",
                        elements=[
                            collections.HistoryDatasetElement(name="forward",
                                                              id=dataset1_id),
                            collections.HistoryDatasetElement(name="reverse",
                                                              id=dataset2_id),
                        ]),
                    collections.CollectionElement(
                        name="sample2",
                        type="paired",
                        elements=[
                            collections.HistoryDatasetElement(name="forward",
                                                              id=dataset3_id),
                            collections.HistoryDatasetElement(name="reverse",
                                                              id=dataset4_id),
                        ]),
                ]))
        self.assertEqual(collection_response["name"], "MyListOfPairedDatasets")
        self.assertEqual(collection_response["collection_type"], "list:paired")
        elements = collection_response["elements"]
        self.assertEqual(len(elements), 2)
        self.assertEqual(elements[0]["element_index"], 0)
        created_pair1 = elements[0]["object"]
        self.assertEqual(created_pair1["collection_type"], "paired")
        self.assertEqual(len(created_pair1["elements"]), 2)
        forward_element1 = created_pair1["elements"][0]
        self.assertEqual(forward_element1["element_identifier"], "forward")
        self.assertEqual(forward_element1["element_index"], 0)
        forward_dataset1 = forward_element1["object"]
        self.assertEqual(forward_dataset1["id"], dataset1_id)

        self.assertEqual(elements[1]["element_index"], 1)
        created_pair2 = elements[1]["object"]
        self.assertEqual(created_pair2["collection_type"], "paired")
        self.assertEqual(len(created_pair2["elements"]), 2)
        reverse_element2 = created_pair2["elements"][1]
        reverse_dataset2 = reverse_element2["object"]

        self.assertEqual(reverse_element2["element_identifier"], "reverse")
        self.assertEqual(reverse_element2["element_index"], 1)
        self.assertEqual(reverse_dataset2["id"], dataset4_id)
Exemplo n.º 2
0
    def build_list(self):
        """
        Builds list of fastqs and fasta files respectively, from the data uploaded to SNVPhyl
        Also checks the number fo r1 and r2 files to see if there is a discrepancy in the data
        :return: 
        """
        while True:
            try:
                contents = self.gi.histories.show_history(self.history_id, contents=True)
                break
            except (ConnectionError, requests.exceptions.ConnectionError):
                self.wait_for_problem()
        fastqs = []

        # create a list of galaxy items, for all the fastq files that are found
        for item in contents:
            if item["history_content_type"] == "dataset" and item["extension"] == "fastq":
                fastqs.append(item)

        # create a list of galaxy items for the r1 and r2 files, to check if they are the same length
        r1s = []
        r2s = []
        for fastq in fastqs:
            result1 = re.findall(r"(.+)_[Rr]1", fastq["name"], flags=0)
            result2 = re.findall(r"(.+)_[Rr]2", fastq["name"], flags=0)
            if len(result1) >= 1:
                fastq["name"] = result1[0]
                r1s.append(fastq)
            if len(result2) >= 1:
                fastq["name"] = result2[0]
                r2s.append(fastq)

        if len(r1s) != len(r2s):
            self.t.time_print("[WARNING] There are different amounts of R1 and R2 files,"
                              " will only use ones that can be paired.")

        pairs = []
        done = []

        # create collection elements with the pairs of r1 and r2 files that were found
        for sequence in r1s:
            for compare in r2s:
                if sequence["name"] == compare["name"] and sequence["name"] not in done:
                    # Pair them
                    elements = [
                            collections.HistoryDatasetElement(name="forward", id=sequence["id"]),
                            collections.HistoryDatasetElement(name="reverse", id=compare["id"])
                        ]
                    done.append(sequence["name"])
                    pairs.append(collections.CollectionElement(sequence["name"], type="paired", elements=elements))

        collection_description = collections.CollectionDescription("pair_list", type="list:paired", elements=pairs)
        while True:
            try:
                self.gi.histories.create_dataset_collection(self.history_id, collection_description)
                break
            except (ConnectionError, requests.exceptions.ConnectionError):
                self.wait_for_problem()
Exemplo n.º 3
0
    def create_dataset_collection(self, gi, outputhist, name="DatasetList"):
        """
        Make a dataset collection with the datasets listed in self.dataset_collection

        Args:
            gi (GalaxyInstance): The current instance of Galaxy being used
            outputhist (History): The history in which to create the dataset collection
            name (str): The name of the new dataset collection
        Returns:
            dataset_collection (HistoryDatasetCollectionAssociation): The new dataset collection object
        """
        self.logger.info("Dataset collection name: '%s'" % name)
        collection_elements = []
        datasets = self.import_datasets('dataset_collection', gi, outputhist)
        if self.dataset_collection['type'] == 'list':
            for i in range(0, len(datasets)):
                collection_elements.append(
                    collections.HistoryDatasetElement(name=datasets[i].name,
                                                      id=datasets[i].id))
        elif self.dataset_collection['type'] == 'list:paired':
            pair_num = 1
            for i in range(0, len(datasets), 2):
                collection_elements.append(
                    collections.CollectionElement(
                        name=datasets[i].name,
                        type='paired',
                        elements=[
                            collections.HistoryDatasetElement(
                                name='forward', id=datasets[i].id),
                            collections.HistoryDatasetElement(
                                name='reverse', id=datasets[i + 1].id),
                        ]))
                pair_num += 1
        else:
            self.logger.error(
                "Dataset collection type must be 'list' or 'list:paired'")
            raise ValueError(
                "Dataset collection type must be 'list' or 'list:paired'")
        collection_description = collections.CollectionDescription(
            name=name,
            type=self.dataset_collection['type'],
            elements=collection_elements)
        dataset_collection = outputhist.create_dataset_collection(
            collection_description)
        return dataset_collection
Exemplo n.º 4
0
    def build_list(self):
        while True:
            try:
                contents = self.gi.histories.show_history(self.history_id,
                                                          contents=True)
                break
            except (ConnectionError, requests.exceptions.ConnectionError):
                self.wait_for_problem()
        fastqs = []

        # get fastq files
        for item in contents:
            if item["history_content_type"] == "dataset" and item[
                    "extension"] == "fastq":
                fastqs.append(item)

        # pair fastq files
        r1s = []
        r2s = []
        for fastq in fastqs:
            result1 = re.findall(r"(.+)_[Rr]1", fastq["name"], flags=0)
            result2 = re.findall(r"(.+)_[Rr]2", fastq["name"], flags=0)
            if len(result1) >= 1:
                fastq["name"] = result1[0]
                r1s.append(fastq)
            if len(result2) >= 1:
                fastq["name"] = result2[0]
                r2s.append(fastq)

        if len(r1s) != len(r2s):
            self.t.time_print(
                "[WARNING] There are different amounts of R1 and R2 files,"
                " will only use ones that can be paired.")

        pairs = []
        done = []

        for sequence in r1s:
            for compare in r2s:
                if sequence["name"] == compare["name"] and sequence[
                        "name"] not in done:
                    # Pair them
                    elements = [
                        collections.HistoryDatasetElement(name="forward",
                                                          id=sequence["id"]),
                        collections.HistoryDatasetElement(name="reverse",
                                                          id=compare["id"])
                    ]
                    done.append(sequence["name"])
                    pairs.append(
                        collections.CollectionElement(sequence["name"],
                                                      type="paired",
                                                      elements=elements))

        collection_description = collections.CollectionDescription(
            "pair_list", type="list:paired", elements=pairs)
        while True:
            try:
                self.gi.histories.create_dataset_collection(
                    self.history_id, collection_description)
                break
            except (ConnectionError, requests.exceptions.ConnectionError):
                self.wait_for_problem()