示例#1
0
    def test_create_missing_jobs(self):
        """Tests that files which should have downloader jobs get them created."""

        # 1. create a sample with an original file and a downloader job
        original_file_with_downloader = OriginalFile()
        original_file_with_downloader.filename = "processed.CEL"
        original_file_with_downloader.source_filename = "processed.CEL"
        original_file_with_downloader.is_downloaded = True
        original_file_with_downloader.is_archive = False
        original_file_with_downloader.save()

        sample_with_downloader = Sample()
        sample_with_downloader.accession_code = "MA_doesnt_need_processor"
        sample_with_downloader.technology = "MICROARRAY"
        sample_with_downloader.source_database = "GEO"
        sample_with_downloader.platform_accession_code = "bovine"
        sample_with_downloader.save()

        OriginalFileSampleAssociation.objects.get_or_create(
            sample=sample_with_downloader,
            original_file=original_file_with_downloader)

        downloader_job = DownloaderJob()
        downloader_job.success = True
        downloader_job.worker_id = "worker_1"
        downloader_job.volume_index = "1"
        downloader_job.save()

        DownloaderJobOriginalFileAssociation.objects.get_or_create(
            downloader_job=downloader_job,
            original_file=original_file_with_downloader)

        # 2. create a sample with an original file and no downloader job
        original_file = OriginalFile()
        original_file.filename = "tarball.gz"
        original_file.source_filename = "tarball.gz"
        original_file.is_downloaded = True
        original_file.is_archive = True
        original_file.save()

        sample_no_downloader = Sample()
        sample_no_downloader.accession_code = "sample_no_downloader"
        sample_no_downloader.technology = "MICROARRAY"
        sample_no_downloader.source_database = "GEO"
        sample_no_downloader.platform_accession_code = "bovine"  # must be a supported platform
        sample_no_downloader.save()

        OriginalFileSampleAssociation.objects.get_or_create(
            sample=sample_no_downloader, original_file=original_file)

        # 3. Setup is done, actually run the command.
        command = Command()
        command.handle()

        ## Test that a missing downloader job was created.
        self.assertEqual(
            1,
            DownloaderJobOriginalFileAssociation.objects.filter(
                original_file=original_file).count(),
        )

        ## Test that a downloader job that wasn't missing wasn't created.
        ## Of course, we created one in test setup, so we're really
        ## checking that it's still only 1.
        self.assertEqual(
            1,
            DownloaderJobOriginalFileAssociation.objects.filter(
                original_file=original_file_with_downloader).count(),
        )
    def test_dharma(self):

        dlj1 = DownloaderJob()
        dlj1.accession_code = 'D1'
        dlj1.worker_id = get_instance_id()
        dlj1.start_time = datetime.datetime.now()
        dlj1.save()

        dlj2 = DownloaderJob()
        dlj2.accession_code = 'D2'
        dlj2.worker_id = get_instance_id()
        dlj2.start_time = datetime.datetime.now()
        dlj2.save()

        dlj3 = DownloaderJob()
        dlj3.accession_code = 'D3'
        dlj3.worker_id = get_instance_id()
        dlj3.save()

        original_file = OriginalFile()
        original_file.source_url = "ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MEXP/E-MEXP-433/E-MEXP-433.raw.1.zip"
        original_file.source_filename = "Waldhof_020604_R30_01-2753_U133A.CEL"
        original_file.save()

        assoc = DownloaderJobOriginalFileAssociation()
        assoc.original_file = original_file
        assoc.downloader_job = dlj3
        assoc.save()

        sample = Sample()
        sample.accession_code = 'Blahblahblah'
        sample.technology = "MICROARRAY"
        sample.manufacturer = "AFFYMETRIX"
        sample.has_raw = True
        sample.platform_accession_code = "hgu133a"
        sample.save()

        OriginalFileSampleAssociation.objects.get_or_create(
            sample=sample, original_file=original_file)

        exited = False
        try:
            utils.start_job(dlj3.id,
                            max_downloader_jobs_per_node=2,
                            force_harakiri=True)
        except SystemExit as e:
            # This is supposed to happen!
            self.assertTrue(True)
            exited = True
        except Exception as e:
            # This isn't!
            self.assertTrue(False)
        self.assertTrue(exited)

        exited = False
        try:
            utils.start_job(dlj3.id,
                            max_downloader_jobs_per_node=15,
                            force_harakiri=True)
        except SystemExit as e:
            # This is not supposed to happen!
            self.assertTrue(False)
            exited = True
        except Exception as e:
            # This is!
            self.assertTrue(True)
        self.assertFalse(exited)
示例#3
0
    def test_create_missing_jobs(self):
        """Tests that files which should have processor jobs get them created.

        Specifically files that fall into this category are files that
        had successful downloader jobs but for some reason do not have
        processor jobs. It's not yet known why this is happening, but
        part of this management command is logging about them to get a
        grasp of how many there are.

        We want this test to cover both Microarray and RNA-Seq. We
        also need to test both that files which need processor jobs
        have them created, but also that files which don't need them
        don't get them created.

        Therefore we need at least 4 original files:
          * Microarray needing processor job.
          * Microarray not needing processor job.
          * RNA-Seq needing processor job.
          * RNA-Seq not needing processor job.

        However Microarray can have files which shouldn't get
        processor jobs, so we're going to make one of those as
        well. Also Microarray jobs can download multiple files which
        get a processor job each, so we're going to make an additional
        Microarray file and associate it with the same downloader job
        so we can make sure two processor jobs are created based on
        that one downloader job.
        """
        # Microarray File/Samples/Jobs
        ma_og_doesnt_need_processor = OriginalFile()
        ma_og_doesnt_need_processor.filename = "processed.CEL"
        ma_og_doesnt_need_processor.is_downloaded = True
        ma_og_doesnt_need_processor.is_archive = False
        ma_og_doesnt_need_processor.save()

        ma_sample_doesnt_need_processor = Sample()
        ma_sample_doesnt_need_processor.accession_code = "MA_doesnt_need_processor"
        ma_sample_doesnt_need_processor.save()

        OriginalFileSampleAssociation.objects.get_or_create(
            sample=ma_sample_doesnt_need_processor,
            original_file=ma_og_doesnt_need_processor)

        ma_dl_job_doesnt_need_processor = DownloaderJob()
        ma_dl_job_doesnt_need_processor.success = True
        ma_dl_job_doesnt_need_processor.worker_id = "worker_1"
        ma_dl_job_doesnt_need_processor.volume_index = "1"
        ma_dl_job_doesnt_need_processor.save()

        DownloaderJobOriginalFileAssociation.objects.get_or_create(
            downloader_job=ma_dl_job_doesnt_need_processor,
            original_file=ma_og_doesnt_need_processor)

        ma_processor_job = ProcessorJob()
        ma_processor_job.success = True
        ma_processor_job.worker_id = "worker_1"
        ma_dl_job_doesnt_need_processor.volume_index = "1"
        ma_processor_job.save()

        ProcessorJobOriginalFileAssociation.objects.get_or_create(
            processor_job=ma_processor_job,
            original_file=ma_og_doesnt_need_processor)

        ma_og_needs_processor_1 = OriginalFile()
        ma_og_needs_processor_1.filename = "something.CEL"
        ma_og_needs_processor_1.is_downloaded = True
        ma_og_needs_processor_1.is_archive = False
        ma_og_needs_processor_1.save()

        ma_og_needs_processor_2 = OriginalFile()
        ma_og_needs_processor_2.filename = "something_else.CEL"
        ma_og_needs_processor_2.is_downloaded = True
        ma_og_needs_processor_2.is_archive = False
        ma_og_needs_processor_2.save()

        ma_og_archive = OriginalFile()
        ma_og_archive.filename = "tarball.gz"
        ma_og_archive.is_downloaded = True
        ma_og_archive.is_archive = True
        ma_og_archive.save()

        ma_sample_needs_processor_1 = Sample()
        ma_sample_needs_processor_1.accession_code = "MA_needs_processor_1"
        ma_sample_needs_processor_1.save()

        OriginalFileSampleAssociation.objects.get_or_create(
            sample=ma_sample_needs_processor_1,
            original_file=ma_og_needs_processor_1)
        OriginalFileSampleAssociation.objects.get_or_create(
            sample=ma_sample_needs_processor_1, original_file=ma_og_archive)

        ma_sample_needs_processor_2 = Sample()
        ma_sample_needs_processor_2.accession_code = "MA_needs_processor_2"
        ma_sample_needs_processor_2.save()

        OriginalFileSampleAssociation.objects.get_or_create(
            sample=ma_sample_needs_processor_2,
            original_file=ma_og_needs_processor_2)
        OriginalFileSampleAssociation.objects.get_or_create(
            sample=ma_sample_needs_processor_2, original_file=ma_og_archive)

        ma_dl_job_needs_processor = DownloaderJob()
        ma_dl_job_needs_processor.success = True
        ma_dl_job_needs_processor.worker_id = "worker_1"
        ma_dl_job_doesnt_need_processor.volume_index = "1"
        ma_dl_job_needs_processor.save()

        DownloaderJobOriginalFileAssociation.objects.get_or_create(
            downloader_job=ma_dl_job_needs_processor,
            original_file=ma_og_needs_processor_1)
        DownloaderJobOriginalFileAssociation.objects.get_or_create(
            downloader_job=ma_dl_job_needs_processor,
            original_file=ma_og_needs_processor_2)
        DownloaderJobOriginalFileAssociation.objects.get_or_create(
            downloader_job=ma_dl_job_needs_processor,
            original_file=ma_og_archive)

        # RNA-Seq File/Samples/Jobs
        rna_og_doesnt_need_processor = OriginalFile()
        rna_og_doesnt_need_processor.filename = "processed.fastq"
        rna_og_doesnt_need_processor.is_downloaded = True
        rna_og_doesnt_need_processor.is_archive = False
        rna_og_doesnt_need_processor.save()

        rna_sample_doesnt_need_processor = Sample()
        rna_sample_doesnt_need_processor.accession_code = "RNA_doesnt_need_processor"
        rna_sample_doesnt_need_processor.save()

        OriginalFileSampleAssociation.objects.get_or_create(
            sample=rna_sample_doesnt_need_processor,
            original_file=rna_og_doesnt_need_processor)

        rna_dl_job_doesnt_need_processor = DownloaderJob()
        rna_dl_job_doesnt_need_processor.success = True
        rna_dl_job_doesnt_need_processor.worker_id = "worker_1"
        rna_dl_job_doesnt_need_processor.volume_index = "1"
        rna_dl_job_doesnt_need_processor.save()

        DownloaderJobOriginalFileAssociation.objects.get_or_create(
            downloader_job=rna_dl_job_doesnt_need_processor,
            original_file=rna_og_doesnt_need_processor)

        rna_processor_job = ProcessorJob()
        # Failed ProcessorJobs will be retried, so they still count.
        rna_processor_job.success = False
        rna_processor_job.worker_id = "worker_1"
        rna_dl_job_doesnt_need_processor.volume_index = "1"
        rna_processor_job.save()

        ProcessorJobOriginalFileAssociation.objects.get_or_create(
            processor_job=rna_processor_job,
            original_file=rna_og_doesnt_need_processor)

        rna_og_needs_processor = OriginalFile()
        rna_og_needs_processor.filename = "something.fastq"
        rna_og_needs_processor.is_downloaded = True
        rna_og_needs_processor.is_archive = False
        rna_og_needs_processor.save()

        rna_sample_needs_processor = Sample()
        rna_sample_needs_processor.accession_code = "RNA_needs_processor"
        rna_sample_needs_processor.save()

        OriginalFileSampleAssociation.objects.get_or_create(
            sample=rna_sample_needs_processor,
            original_file=rna_og_needs_processor)

        rna_dl_job_needs_processor = DownloaderJob()
        rna_dl_job_needs_processor.success = True
        rna_dl_job_needs_processor.worker_id = "worker_1"
        rna_dl_job_doesnt_need_processor.volume_index = "1"
        rna_dl_job_needs_processor.save()

        DownloaderJobOriginalFileAssociation.objects.get_or_create(
            downloader_job=rna_dl_job_needs_processor,
            original_file=rna_og_needs_processor)

        # Setup is done, actually run the command.
        command = Command()
        command.handle()

        # Test Microarray was handled correctly.
        ## Test that a missing processor job was created.
        self.assertEqual(
            1,
            ProcessorJobOriginalFileAssociation.objects.filter(
                original_file=ma_og_needs_processor_1).count())
        self.assertEqual(
            1,
            ProcessorJobOriginalFileAssociation.objects.filter(
                original_file=ma_og_needs_processor_2).count())
        self.assertEqual(
            0,
            ProcessorJobOriginalFileAssociation.objects.filter(
                original_file=ma_og_archive).count())

        ## Test that a processor job that wasn't missing wasn't created.
        ## Of course, we created one in test setup, so we're really
        ## checking that it's still only 1.
        self.assertEqual(
            1,
            ProcessorJobOriginalFileAssociation.objects.filter(
                original_file=ma_og_doesnt_need_processor).count())

        # Test Microarray was handled correctly.
        ## Test that the missing processor job was created.
        self.assertEqual(
            1,
            ProcessorJobOriginalFileAssociation.objects.filter(
                original_file=rna_og_needs_processor).count())

        ## Test that a processor job that wasn't missing wasn't created.
        ## Of course, we created one in test setup, so we're really
        ## checking that it's still only 1.
        self.assertEqual(
            1,
            ProcessorJobOriginalFileAssociation.objects.filter(
                original_file=rna_og_doesnt_need_processor).count())