def test_correct_index_location_metazoa(self):
        """ Tests that the files returned actually exist.

        Tests the Metazoa division instead of the main division.
        """
        survey_job = SurveyJob(source_type="TRANSCRIPTOME_INDEX")
        survey_job.save()

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="ensembl_division",
                                           value="EnsemblMetazoa")
        key_value_pair.save()

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="organism_name",
                                           value="Octopus bimaculoides")
        key_value_pair.save()

        surveyor = TranscriptomeIndexSurveyor(survey_job)
        files = surveyor.discover_species()[0]

        for file in files:
            urllib.request.urlopen(file.source_url)

        # Make sure the organism object got created by making sure
        # this doesn't raise an exception.
        Organism.objects.get(name="OCTOPUS_BIMACULOIDES")
    def test_single_plant(self):
        """ Tests that the files returned actually exist.

        Tests the Metazoa division instead of the main division.
        """
        survey_job = SurveyJob(source_type="TRANSCRIPTOME_INDEX")
        survey_job.save()

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="ensembl_division",
                                           value="EnsemblPlants")
        key_value_pair.save()

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="organism_name",
                                           value="Arabidopsis thaliana")
        key_value_pair.save()

        surveyor = TranscriptomeIndexSurveyor(survey_job)
        files = surveyor.discover_species()[0]

        for file in files:
            urllib.request.urlopen(file.source_url)

        # Make sure the organism object got created by making sure
        # this doesn't raise an exception.
        Organism.objects.get(name="ARABIDOPSIS_THALIANA")
    def test_correct_index_location(self):
        """ Tests that the files returned actually exist.

        Uses an organism in the main division.
        """
        survey_job = SurveyJob(source_type="TRANSCRIPTOME_INDEX")
        survey_job.save()

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="ensembl_division",
                                           value="Ensembl")
        key_value_pair.save()

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="organism_name",
                                           value="Danio rerio")
        key_value_pair.save()

        surveyor = TranscriptomeIndexSurveyor(survey_job)
        files = surveyor.discover_species()[0]

        # Make sure the organism object got created by making sure
        # this doesn't raise an exception.
        Organism.objects.get(name="DANIO_RERIO")

        for file in files:
            urllib.request.urlopen(file.source_url)
    def test_survey_bacteria(self, mock_send_job):
        survey_job = SurveyJob(source_type="TRANSCRIPTOME_INDEX")
        survey_job.save()

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="ensembl_division",
                                           value="EnsemblBacteria")
        key_value_pair.save()

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="organism_name",
                                           value="PSEUDOMONAS_AERUGINOSA")
        key_value_pair.save()

        surveyor = TranscriptomeIndexSurveyor(survey_job)
        surveyor.survey(source_type="TRANSCRIPTOME_INDEX")

        downloader_jobs = DownloaderJob.objects.order_by("id").all()
        self.assertEqual(downloader_jobs.count(), 1)
        send_job_calls = []
        for downloader_job in downloader_jobs:
            send_job_calls.append(
                call(Downloaders.TRANSCRIPTOME_INDEX, downloader_job))

        mock_send_job.assert_has_calls(send_job_calls)

        # Make sure the organism object got created with the correct
        # taxonomy id by making sure this doesn't raise an exception.
        Organism.objects.get(name="PSEUDOMONAS_AERUGINOSA", taxonomy_id=287)
    def test_survey(self, mock_send_job):
        surveyor = TranscriptomeIndexSurveyor(self.survey_job)
        surveyor.survey(source_type="TRANSCRIPTOME_INDEX")

        downloader_jobs = DownloaderJob.objects.order_by("id").all()
        self.assertGreater(downloader_jobs.count(), 50)
        send_job_calls = []
        for downloader_job in downloader_jobs:
            send_job_calls.append(
                call(Downloaders.TRANSCRIPTOME_INDEX, downloader_job))

        mock_send_job.assert_has_calls(send_job_calls)
Exemplo n.º 6
0
    def test_survey(self, mock_get, mock_urlopen, mock_send_job):
        json_file_path = os.path.join(os.path.dirname(__file__),
                                      "test_transcriptome_species.json")
        with open(json_file_path, "r") as json_file:
            species_json = json.load(json_file)

        # Insert the organisms into the database so the model doesn't call the
        # taxonomy API to populate them.
        for species in species_json:
            # Account for the subtle difference between the API for
            # the main Ensembl division and the API for the rest of
            # them.
            name_key = "common_name" if "common_name" in species else "name"
            taxonomy_key = "taxonomy_id" if "taxonomy_id" in species else "taxon_id"
            organism = Organism(name=species[name_key].upper(),
                                taxonomy_id=species[taxonomy_key],
                                is_scientific_name=True)
            organism.save()

        mock_get.return_value = Mock(ok=True)
        mock_get.return_value.json.return_value = species_json

        # There are two possible file locations. The correct one is
        # determined by making a request to one to see if it
        # exists. This URLError simulates it not existing.
        mock_urlopen.side_effect = URLError("404 or something")

        surveyor = TranscriptomeIndexSurveyor(self.survey_job)
        surveyor.survey()

        downloader_jobs = DownloaderJob.objects.order_by("id").all()
        self.assertEqual(downloader_jobs.count(), len(species_json))
        send_job_calls = []
        for downloader_job in downloader_jobs:
            send_job_calls.append(
                call(Downloaders.TRANSCRIPTOME_INDEX, downloader_job.id))

        mock_send_job.assert_has_calls(send_job_calls)

        # There should be 2 Batches for each species (long and short
        # transcriptome lengths).
        batches = Batch.objects.all()
        self.assertEqual(batches.count(), len(species_json) * 2)
        # And each batch has two files: fasta and gtf
        for batch in batches:
            self.assertEqual(len(batch.files), 2)
    def test_survey_fungi_none(self, mock_send_job):
        """When surveying fungi an organism_name must be supplied."""
        survey_job = SurveyJob(source_type="TRANSCRIPTOME_INDEX")
        survey_job.save()

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="ensembl_division",
                                           value="EnsemblFungi")
        key_value_pair.save()

        surveyor = TranscriptomeIndexSurveyor(survey_job)
        surveyor.survey(source_type="TRANSCRIPTOME_INDEX")

        downloader_jobs = DownloaderJob.objects.order_by("id").all()
        self.assertEqual(downloader_jobs.count(), 0)

        mock_send_job.assert_not_called()
Exemplo n.º 8
0
    def test_survey(self, mock_send_job):
        survey_job = SurveyJob(source_type="TRANSCRIPTOME_INDEX")
        survey_job.save()

        key_value_pair = SurveyJobKeyValue(
            survey_job=survey_job, key="ensembl_division", value="EnsemblPlants"
        )
        key_value_pair.save()

        surveyor = TranscriptomeIndexSurveyor(survey_job)
        surveyor.survey(source_type="TRANSCRIPTOME_INDEX")

        downloader_jobs = DownloaderJob.objects.order_by("id").all()
        self.assertGreater(downloader_jobs.count(), 50)
        send_job_calls = []
        for downloader_job in downloader_jobs:
            send_job_calls.append(call(Downloaders.TRANSCRIPTOME_INDEX, downloader_job))

        mock_send_job.assert_has_calls(send_job_calls)
Exemplo n.º 9
0
def _get_surveyor_for_source(survey_job: SurveyJob):
    """Factory method for ExternalSourceSurveyors."""
    if survey_job.source_type == "ARRAY_EXPRESS":
        return ArrayExpressSurveyor(survey_job)
    if survey_job.source_type == "SRA":
        return SraSurveyor(survey_job)
    if survey_job.source_type == "TRANSCRIPTOME_INDEX":
        return TranscriptomeIndexSurveyor(survey_job)
    else:
        raise SourceNotSupportedError("Source " + survey_job.source_type +
                                      " is not supported.")
    def test_correct_index_location_protist(self):
        """ Tests that the files returned actually exist.

        Tests the Metazoa division instead of the main division.
        """
        survey_job = SurveyJob(source_type="TRANSCRIPTOME_INDEX")
        survey_job.save()

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="ensembl_division",
                                           value="EnsemblProtists")
        key_value_pair.save()

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="organism_name",
                                           value="Leishmania major")
        key_value_pair.save()

        surveyor = TranscriptomeIndexSurveyor(survey_job)
        files = surveyor.discover_species()[0]

        for file in files:
            urllib.request.urlopen(file.source_url)
    def test_correct_index_location(self):
        """ Tests that the files returned actually exist.

        Uses an organism in the main division.
        """
        survey_job = SurveyJob(source_type="TRANSCRIPTOME_INDEX")
        survey_job.save()
        self.survey_job = survey_job

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="ensembl_division",
                                           value="Ensembl")
        key_value_pair.save()

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="organism_name",
                                           value="Danio rerio")
        key_value_pair.save()

        surveyor = TranscriptomeIndexSurveyor(self.survey_job)
        files = surveyor.discover_species()[0]

        for file in files:
            urllib.request.urlopen(file.source_url)