def handle(self, *args, **options):
        if options["job_id"] is None:
            logger.error("You must specify a job ID.")
            sys.exit(1)

        try:
            job_type = Downloaders[options["job_name"]]
        except KeyError:
            logger.error("You must specify a valid job name.")
            sys.exit(1)

        if job_type is Downloaders.ARRAY_EXPRESS:
            download_array_express(options["job_id"])
        elif job_type is Downloaders.TRANSCRIPTOME_INDEX:
            download_transcriptome(options["job_id"])
        elif job_type is Downloaders.SRA:
            download_sra(options["job_id"])
        elif job_type is Downloaders.GEO:
            download_geo(options["job_id"])
        else:
            logger.error(
                ("A valid job name was specified for job %s with id %d but "
                 "no downloader function is known to run it."),
                options["job_name"],
                options["job_id"],
            )
            sys.exit(1)

        sys.exit(0)
示例#2
0
    def test_no_rnaseq(self):
        """Makes sure that no RNA-Seq data gets downloaded even if there's a job for it.
        """
        dlj = DownloaderJob()
        dlj.accession_code = 'GSE103217'
        dlj.save()

        original_file = OriginalFile()
        original_file.filename = "GSE103217_family.xml.tgz"
        original_file.source_url = "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE103nnn/GSE103217/miniml/GSE103217_family.xml.tgz"
        original_file.source_filename = "GSE103217_family.xml.tgz"
        original_file.save()

        assoc = DownloaderJobOriginalFileAssociation()
        assoc.original_file = original_file
        assoc.downloader_job = dlj
        assoc.save()

        sample = Sample()
        sample.accession_code = 'GSE103217'
        sample.technology = "RNA-SEQ"
        sample.manufacturer = "ILLUMINA"
        sample.platform_accession_code = "Illumina HiSeq 2500"
        sample.save()

        og_assoc = OriginalFileSampleAssociation()
        og_assoc.sample = sample
        og_assoc.original_file = original_file
        og_assoc.save()

        download_result = geo.download_geo(dlj.id)

        self.assertFalse(download_result)
        dlj.refresh_from_db()

        self.assertFalse(dlj.success)

        # It's not necessarily that we didn't extract any files, but
        # none that were usable so it looks like none.
        self.assertEqual(dlj.failure_reason,
                         "Failed to extract any downloaded files.")
示例#3
0
    def test_download_geo(self, mock_send_task):
        """ Tests the main 'download_geo' function. """

        dlj = DownloaderJob()
        dlj.accession_code = 'GSE22427'
        dlj.save()

        original_file = OriginalFile()
        original_file.filename = "GSE22427_non-normalized.txt.gz"
        original_file.source_url = "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE22nnn/GSE22427/suppl/GSE22427_non-normalized.txt.gz"
        original_file.source_filename = "GSE22427_non-normalized.txt.gz"
        original_file.save()

        assoc = DownloaderJobOriginalFileAssociation()
        assoc.original_file = original_file
        assoc.downloader_job = dlj
        assoc.save()

        sample = Sample()
        sample.accession_code = 'GSE22427'
        sample.technology = "MICROARRAY"
        sample.manufacturer = "AGILENT"
        sample.has_raw = True
        # This is fake, but we don't currently support any agilent
        # platforms so we're using a platform that is supported.
        sample.platform_accession_code = "Illumina_RatRef-12_V1.0"
        sample.save()

        sample_annotation = SampleAnnotation()
        sample_annotation.sample = sample
        sample_annotation.data = {
            'label_protocol_ch1': 'Agilent',
            'label_protocol_ch2': 'Agilent'
        }
        sample_annotation.save()

        og_assoc = OriginalFileSampleAssociation()
        og_assoc.sample = sample
        og_assoc.original_file = original_file
        og_assoc.save()

        download_result = geo.download_geo(dlj.id)

        file_assocs = OriginalFileSampleAssociation.objects.filter(
            sample=sample)
        self.assertEqual(file_assocs.count(), 2)

        for file_assoc in file_assocs:
            original_file = file_assoc.original_file
            if original_file.filename.endswith(".gz"):
                # We delete the archive after we extract from it
                self.assertFalse(original_file.is_downloaded)
            else:
                self.assertTrue(original_file.is_downloaded)

        # Make sure it worked
        self.assertTrue(download_result)
        self.assertTrue(dlj.failure_reason is None)
        self.assertTrue(len(ProcessorJob.objects.all()) > 0)
        self.assertEqual(ProcessorJob.objects.all()[0].pipeline_applied,
                         "AGILENT_TWOCOLOR_TO_PCL")
        self.assertEqual(ProcessorJob.objects.all()[0].ram_amount, 2048)