Пример #1
0
    def handle(self, *args, **options):
        if options["job_id"] is None:
            logger.error("You must specify a job ID.")
            sys.exit(1)

        try:
            job_type = Downloaders[options["job_name"]]
        except KeyError:
            logger.error("You must specify a valid job name.")
            sys.exit(1)

        if job_type is Downloaders.ARRAY_EXPRESS:
            download_array_express(options["job_id"])
        elif job_type is Downloaders.TRANSCRIPTOME_INDEX:
            download_transcriptome(options["job_id"])
        elif job_type is Downloaders.SRA:
            download_sra(options["job_id"])
        elif job_type is Downloaders.GEO:
            download_geo(options["job_id"])
        else:
            logger.error(
                ("A valid job name was specified for job %s with id %d but "
                 "no downloader function is known to run it."),
                options["job_name"],
                options["job_id"],
            )
            sys.exit(1)

        sys.exit(0)
Пример #2
0
    def test_multiple_batches(self, mock_download_file):
        # Just in case this test ever breaks, we don't actually want
        # to download the file because that'll take a while to fail.
        mock_download_file.return_value = True

        batch, _ = self.insert_objects()
        batch2 = Batch(survey_job=self.survey_job,
                       source_type="SRA",
                       pipeline_required="SALMON",
                       platform_accession_code="IlluminaHiSeq2000",
                       experiment_accession_code="DRX001564",
                       experiment_title="It doesn't really matter.",
                       organism_id=9031,
                       organism_name="GALLUS GALLUS",
                       release_date="2013-07-19",
                       last_uploaded_date="2017-09-11",
                       status=BatchStatuses.NEW.value)
        batch2.save()
        downloader_job = DownloaderJob.create_job_and_relationships(
            batches=[batch, batch2], downloader_task="dummy")
        downloader_job.save()

        sra.download_sra(downloader_job.id)

        completed_job = DownloaderJob.objects.get(id=downloader_job.id)
        self.assertFalse(completed_job.success)
        self.assertEqual(completed_job.failure_reason,
                         ("More than one batch found for SRA downloader job. "
                          "There should only be one."))
Пример #3
0
    def test_upload_fails(self, mock_download_file, mock_upload_raw_file,
                          mock_getsize):
        # We don't actually want to download anything and we're
        # testing this function separately anyway.
        mock_download_file.return_value = True

        mock_getsize.return_value = 1337

        def raise_exception(job_dir):
            raise Exception("We're testing that this fails.")

        mock_upload_raw_file.side_effect = raise_exception

        batch, files = self.insert_objects()
        downloader_job = DownloaderJob.create_job_and_relationships(
            batches=[batch], downloader_task="dummy")
        downloader_job.save()

        sra.download_sra(downloader_job.id)

        downloader_job.refresh_from_db()
        self.assertFalse(downloader_job.success)
        self.assertEquals(downloader_job.failure_reason,
                          "Exception caught while uploading file.")

        self.assertEquals(len(mock_upload_raw_file.mock_calls), 1)
Пример #4
0
    def test_zero_batches(self, mock_download_file):
        # Just in case this test ever breaks, we don't actually want
        # to download the file because that'll take a while to fail.
        mock_download_file.return_value = True

        downloader_job = DownloaderJob.create_job_and_relationships(
            batches=[], downloader_task="dummy")
        downloader_job.save()

        sra.download_sra(downloader_job.id)

        completed_job = DownloaderJob.objects.get(id=downloader_job.id)
        self.assertFalse(completed_job.success)
        self.assertEqual(completed_job.failure_reason, "No batches found.")
Пример #5
0
 def test_download_file_ncbi(self):
     dlj = DownloaderJob()
     dlj.accession_code = "SRR9117853"
     dlj.save()
     og = OriginalFile()
     og.source_filename = "SRR9117853.sra"
     og.source_url = "[email protected]:/sra/sra-instant/reads/ByRun/sra/SRR/SRR9117/SRR9117853/SRR9117853.sra"
     og.is_archive = True
     og.save()
     sample = Sample()
     sample.accession_code = "SRR9117853"
     sample.save()
     assoc = OriginalFileSampleAssociation()
     assoc.sample = sample
     assoc.original_file = og
     assoc.save()
     assoc = DownloaderJobOriginalFileAssociation()
     assoc.downloader_job = dlj
     assoc.original_file = og
     assoc.save()
     result, downloaded_files = sra.download_sra(dlj.pk)
     utils.end_downloader_job(dlj, result)
     self.assertTrue(result)
     self.assertEqual(downloaded_files[0].sha1, "e7ad484fe6f134ba7d1b2664e58cc15ae5a958cc")
     self.assertTrue(os.path.exists(downloaded_files[0].absolute_file_path))
Пример #6
0
 def test_download_file_ncbi(self, mock_send_job):
     mock_send_job.return_value = None
     
     dlj = DownloaderJob()
     dlj.accession_code = "DRR002116"
     dlj.save()
     og = OriginalFile()
     og.source_filename = "DRR002116.sra"
     og.source_url = "[email protected]:/sra/sra-instant/reads/ByRun/sra/DRR/DRR002/DRR002116/DRR002116.sra"
     og.is_archive = True
     og.save()
     sample = Sample()
     sample.accession_code = 'DRR002116'
     sample.save()
     assoc = OriginalFileSampleAssociation()
     assoc.sample = sample
     assoc.original_file = og
     assoc.save()
     assoc = DownloaderJobOriginalFileAssociation()
     assoc.downloader_job = dlj
     assoc.original_file = og
     assoc.save()
     result, downloaded_files = sra.download_sra(dlj.pk)
     utils.end_downloader_job(dlj, result)
     self.assertTrue(result)
     self.assertEqual(downloaded_files[0].sha1, 'd5374e7fe047d4f76b165c3f5148ab2df9d42cea')
     self.assertTrue(os.path.exists(downloaded_files[0].absolute_file_path))
Пример #7
0
    def test_download_file(self, mock_send_job):
        mock_send_job.return_value = None
        
        dlj = DownloaderJob()
        dlj.accession_code = "ERR036"
        dlj.save()

        og = OriginalFile()
        og.source_filename = "ERR036000.fastq.gz"
        og.source_url = "ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR036/ERR036000/ERR036000_1.fastq.gz"
        og.is_archive = True
        og.save()

        sample = Sample()
        sample.accession_code = 'ERR036000'
        sample.save()

        assoc = OriginalFileSampleAssociation()
        assoc.sample = sample
        assoc.original_file = og
        assoc.save()

        assoc = DownloaderJobOriginalFileAssociation()
        assoc.downloader_job = dlj
        assoc.original_file = og
        assoc.save()

        success = sra.download_sra(dlj.pk)
Пример #8
0
    def test_download_file(self):
        dlj = DownloaderJob()
        dlj.accession_code = "ERR036"
        dlj.save()

        og = OriginalFile()
        og.source_filename = "ERR036000.fastq.gz"
        og.source_url = "ftp.sra.ebi.ac.uk/vol1/fastq/ERR036/ERR036000/ERR036000_1.fastq.gz"
        og.is_archive = True
        og.save()

        sample = Sample()
        sample.accession_code = "ERR036000"
        sample.save()

        assoc = OriginalFileSampleAssociation()
        assoc.sample = sample
        assoc.original_file = og
        assoc.save()

        assoc = DownloaderJobOriginalFileAssociation()
        assoc.downloader_job = dlj
        assoc.original_file = og
        assoc.save()

        result, downloaded_files = sra.download_sra(dlj.pk)

        self.assertTrue(result)
        self.assertEqual(downloaded_files[0].sha1,
                         "1dfe5460a4101fe87feeffec0cb2e053f6695961")
        self.assertTrue(os.path.exists(downloaded_files[0].absolute_file_path))
Пример #9
0
    def test_happy_path(self, mock_download_file, mock_send_job,
                        mock_upload_raw_file, mock_getsize):
        mock_send_job.return_value = None

        # We don't actually want to download anything and we're
        # testing this function separately anyway.
        mock_download_file.return_value = True

        mock_getsize.return_value = 1337

        batch, files = self.insert_objects()
        downloader_job = DownloaderJob.create_job_and_relationships(
            batches=[batch], downloader_task="dummy")
        downloader_job.save()

        sra.download_sra(downloader_job.id)

        downloader_job.refresh_from_db()
        self.assertTrue(downloader_job.success)
        for file in files:
            file.refresh_from_db()
            self.assertEquals(file.size_in_bytes, 1337)

        processor_job = ProcessorJob.objects.get()

        target_path_template = "/home/user/data_store/temp/IlluminaHiSeq2000/SALMON/downloader_job_{}/DRR002116_{}.fastq.gz"  # noqa
        target_path_1 = target_path_template.format(downloader_job.id, 1)
        target_path_2 = target_path_template.format(downloader_job.id, 2)

        # Impossible to match the exact File and DownloaderJob
        # objects, so rather than trying to do so, just pull them out
        # from the calls and test the path it was called with:
        first_call = mock_download_file.call_args_list[0][0]
        second_call = mock_download_file.call_args_list[1][0]
        mock_download_file.assert_has_calls([
            call(first_call[0], first_call[1], target_path_2),
            call(second_call[0], second_call[1], target_path_1)
        ])

        mock_send_job.assert_called_once_with(ProcessorPipeline.SALMON,
                                              processor_job.id)

        self.assertEquals(len(mock_upload_raw_file.mock_calls), 2)
Пример #10
0
    def test_download_file_unmated_reads(self):
        dlj = DownloaderJob()
        dlj.accession_code = "SRR1603661"
        dlj.save()
        og_1 = OriginalFile()
        og_1.source_filename = "SRR1603661_1.fastq.gz"
        og_1.source_url = "ftp.sra.ebi.ac.uk/vol1/fastq/SRR160/001/SRR1603661/SRR1603661_1.fastq.gz"
        og_1.expected_md5 = "502a9a482bfa5aa75865ccc0105ad13c"
        og_1.expected_size_in_bytes = 6751980628
        og_1.is_archive = True
        og_1.save()
        og_2 = OriginalFile()
        og_2.source_filename = "SRR1603661_2.fastq.gz"
        og_2.source_url = "ftp.sra.ebi.ac.uk/vol1/fastq/SRR160/001/SRR1603661/SRR1603661_2.fastq.gz"
        og_1.expected_md5 = "fffd24457418d255991f54ec82a39d57"
        og_1.expected_size_in_bytes = 6949912932
        og_2.is_archive = True
        og_2.save()
        sample = Sample()
        sample.accession_code = "SRR1603661"
        sample.save()
        assoc = OriginalFileSampleAssociation()
        assoc.sample = sample
        assoc.original_file = og_1
        assoc.save()
        assoc = DownloaderJobOriginalFileAssociation()
        assoc.downloader_job = dlj
        assoc.original_file = og_1
        assoc.save()
        assoc = OriginalFileSampleAssociation()
        assoc.sample = sample
        assoc.original_file = og_2
        assoc.save()
        assoc = DownloaderJobOriginalFileAssociation()
        assoc.downloader_job = dlj
        assoc.original_file = og_2
        assoc.save()
        result, downloaded_files = sra.download_sra(dlj.pk)
        utils.end_downloader_job(dlj, result)

        self.assertTrue(result)
        self.assertEqual(downloaded_files[0].sha1,
                         "52bf22472069d04fa7767429f6ab78ebd10c0152")
        self.assertTrue(os.path.exists(downloaded_files[0].absolute_file_path))