def test_create_missing_jobs(self): """Tests that files which should have downloader jobs get them created.""" # 1. create a sample with an original file and a downloader job original_file_with_downloader = OriginalFile() original_file_with_downloader.filename = "processed.CEL" original_file_with_downloader.source_filename = "processed.CEL" original_file_with_downloader.is_downloaded = True original_file_with_downloader.is_archive = False original_file_with_downloader.save() sample_with_downloader = Sample() sample_with_downloader.accession_code = "MA_doesnt_need_processor" sample_with_downloader.technology = "MICROARRAY" sample_with_downloader.source_database = "GEO" sample_with_downloader.platform_accession_code = "bovine" sample_with_downloader.save() OriginalFileSampleAssociation.objects.get_or_create( sample=sample_with_downloader, original_file=original_file_with_downloader) downloader_job = DownloaderJob() downloader_job.success = True downloader_job.worker_id = "worker_1" downloader_job.volume_index = "1" downloader_job.save() DownloaderJobOriginalFileAssociation.objects.get_or_create( downloader_job=downloader_job, original_file=original_file_with_downloader) # 2. create a sample with an original file and no downloader job original_file = OriginalFile() original_file.filename = "tarball.gz" original_file.source_filename = "tarball.gz" original_file.is_downloaded = True original_file.is_archive = True original_file.save() sample_no_downloader = Sample() sample_no_downloader.accession_code = "sample_no_downloader" sample_no_downloader.technology = "MICROARRAY" sample_no_downloader.source_database = "GEO" sample_no_downloader.platform_accession_code = "bovine" # must be a supported platform sample_no_downloader.save() OriginalFileSampleAssociation.objects.get_or_create( sample=sample_no_downloader, original_file=original_file) # 3. Setup is done, actually run the command. command = Command() command.handle() ## Test that a missing downloader job was created. self.assertEqual( 1, DownloaderJobOriginalFileAssociation.objects.filter( original_file=original_file).count(), ) ## Test that a downloader job that wasn't missing wasn't created. ## Of course, we created one in test setup, so we're really ## checking that it's still only 1. self.assertEqual( 1, DownloaderJobOriginalFileAssociation.objects.filter( original_file=original_file_with_downloader).count(), )
def test_dharma(self): dlj1 = DownloaderJob() dlj1.accession_code = 'D1' dlj1.worker_id = get_instance_id() dlj1.start_time = datetime.datetime.now() dlj1.save() dlj2 = DownloaderJob() dlj2.accession_code = 'D2' dlj2.worker_id = get_instance_id() dlj2.start_time = datetime.datetime.now() dlj2.save() dlj3 = DownloaderJob() dlj3.accession_code = 'D3' dlj3.worker_id = get_instance_id() dlj3.save() original_file = OriginalFile() original_file.source_url = "ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MEXP/E-MEXP-433/E-MEXP-433.raw.1.zip" original_file.source_filename = "Waldhof_020604_R30_01-2753_U133A.CEL" original_file.save() assoc = DownloaderJobOriginalFileAssociation() assoc.original_file = original_file assoc.downloader_job = dlj3 assoc.save() sample = Sample() sample.accession_code = 'Blahblahblah' sample.technology = "MICROARRAY" sample.manufacturer = "AFFYMETRIX" sample.has_raw = True sample.platform_accession_code = "hgu133a" sample.save() OriginalFileSampleAssociation.objects.get_or_create( sample=sample, original_file=original_file) exited = False try: utils.start_job(dlj3.id, max_downloader_jobs_per_node=2, force_harakiri=True) except SystemExit as e: # This is supposed to happen! self.assertTrue(True) exited = True except Exception as e: # This isn't! self.assertTrue(False) self.assertTrue(exited) exited = False try: utils.start_job(dlj3.id, max_downloader_jobs_per_node=15, force_harakiri=True) except SystemExit as e: # This is not supposed to happen! self.assertTrue(False) exited = True except Exception as e: # This is! self.assertTrue(True) self.assertFalse(exited)
def test_create_missing_jobs(self): """Tests that files which should have processor jobs get them created. Specifically files that fall into this category are files that had successful downloader jobs but for some reason do not have processor jobs. It's not yet known why this is happening, but part of this management command is logging about them to get a grasp of how many there are. We want this test to cover both Microarray and RNA-Seq. We also need to test both that files which need processor jobs have them created, but also that files which don't need them don't get them created. Therefore we need at least 4 original files: * Microarray needing processor job. * Microarray not needing processor job. * RNA-Seq needing processor job. * RNA-Seq not needing processor job. However Microarray can have files which shouldn't get processor jobs, so we're going to make one of those as well. Also Microarray jobs can download multiple files which get a processor job each, so we're going to make an additional Microarray file and associate it with the same downloader job so we can make sure two processor jobs are created based on that one downloader job. """ # Microarray File/Samples/Jobs ma_og_doesnt_need_processor = OriginalFile() ma_og_doesnt_need_processor.filename = "processed.CEL" ma_og_doesnt_need_processor.is_downloaded = True ma_og_doesnt_need_processor.is_archive = False ma_og_doesnt_need_processor.save() ma_sample_doesnt_need_processor = Sample() ma_sample_doesnt_need_processor.accession_code = "MA_doesnt_need_processor" ma_sample_doesnt_need_processor.save() OriginalFileSampleAssociation.objects.get_or_create( sample=ma_sample_doesnt_need_processor, original_file=ma_og_doesnt_need_processor) ma_dl_job_doesnt_need_processor = DownloaderJob() ma_dl_job_doesnt_need_processor.success = True ma_dl_job_doesnt_need_processor.worker_id = "worker_1" ma_dl_job_doesnt_need_processor.volume_index = "1" ma_dl_job_doesnt_need_processor.save() DownloaderJobOriginalFileAssociation.objects.get_or_create( downloader_job=ma_dl_job_doesnt_need_processor, original_file=ma_og_doesnt_need_processor) ma_processor_job = ProcessorJob() ma_processor_job.success = True ma_processor_job.worker_id = "worker_1" ma_dl_job_doesnt_need_processor.volume_index = "1" ma_processor_job.save() ProcessorJobOriginalFileAssociation.objects.get_or_create( processor_job=ma_processor_job, original_file=ma_og_doesnt_need_processor) ma_og_needs_processor_1 = OriginalFile() ma_og_needs_processor_1.filename = "something.CEL" ma_og_needs_processor_1.is_downloaded = True ma_og_needs_processor_1.is_archive = False ma_og_needs_processor_1.save() ma_og_needs_processor_2 = OriginalFile() ma_og_needs_processor_2.filename = "something_else.CEL" ma_og_needs_processor_2.is_downloaded = True ma_og_needs_processor_2.is_archive = False ma_og_needs_processor_2.save() ma_og_archive = OriginalFile() ma_og_archive.filename = "tarball.gz" ma_og_archive.is_downloaded = True ma_og_archive.is_archive = True ma_og_archive.save() ma_sample_needs_processor_1 = Sample() ma_sample_needs_processor_1.accession_code = "MA_needs_processor_1" ma_sample_needs_processor_1.save() OriginalFileSampleAssociation.objects.get_or_create( sample=ma_sample_needs_processor_1, original_file=ma_og_needs_processor_1) OriginalFileSampleAssociation.objects.get_or_create( sample=ma_sample_needs_processor_1, original_file=ma_og_archive) ma_sample_needs_processor_2 = Sample() ma_sample_needs_processor_2.accession_code = "MA_needs_processor_2" ma_sample_needs_processor_2.save() OriginalFileSampleAssociation.objects.get_or_create( sample=ma_sample_needs_processor_2, original_file=ma_og_needs_processor_2) OriginalFileSampleAssociation.objects.get_or_create( sample=ma_sample_needs_processor_2, original_file=ma_og_archive) ma_dl_job_needs_processor = DownloaderJob() ma_dl_job_needs_processor.success = True ma_dl_job_needs_processor.worker_id = "worker_1" ma_dl_job_doesnt_need_processor.volume_index = "1" ma_dl_job_needs_processor.save() DownloaderJobOriginalFileAssociation.objects.get_or_create( downloader_job=ma_dl_job_needs_processor, original_file=ma_og_needs_processor_1) DownloaderJobOriginalFileAssociation.objects.get_or_create( downloader_job=ma_dl_job_needs_processor, original_file=ma_og_needs_processor_2) DownloaderJobOriginalFileAssociation.objects.get_or_create( downloader_job=ma_dl_job_needs_processor, original_file=ma_og_archive) # RNA-Seq File/Samples/Jobs rna_og_doesnt_need_processor = OriginalFile() rna_og_doesnt_need_processor.filename = "processed.fastq" rna_og_doesnt_need_processor.is_downloaded = True rna_og_doesnt_need_processor.is_archive = False rna_og_doesnt_need_processor.save() rna_sample_doesnt_need_processor = Sample() rna_sample_doesnt_need_processor.accession_code = "RNA_doesnt_need_processor" rna_sample_doesnt_need_processor.save() OriginalFileSampleAssociation.objects.get_or_create( sample=rna_sample_doesnt_need_processor, original_file=rna_og_doesnt_need_processor) rna_dl_job_doesnt_need_processor = DownloaderJob() rna_dl_job_doesnt_need_processor.success = True rna_dl_job_doesnt_need_processor.worker_id = "worker_1" rna_dl_job_doesnt_need_processor.volume_index = "1" rna_dl_job_doesnt_need_processor.save() DownloaderJobOriginalFileAssociation.objects.get_or_create( downloader_job=rna_dl_job_doesnt_need_processor, original_file=rna_og_doesnt_need_processor) rna_processor_job = ProcessorJob() # Failed ProcessorJobs will be retried, so they still count. rna_processor_job.success = False rna_processor_job.worker_id = "worker_1" rna_dl_job_doesnt_need_processor.volume_index = "1" rna_processor_job.save() ProcessorJobOriginalFileAssociation.objects.get_or_create( processor_job=rna_processor_job, original_file=rna_og_doesnt_need_processor) rna_og_needs_processor = OriginalFile() rna_og_needs_processor.filename = "something.fastq" rna_og_needs_processor.is_downloaded = True rna_og_needs_processor.is_archive = False rna_og_needs_processor.save() rna_sample_needs_processor = Sample() rna_sample_needs_processor.accession_code = "RNA_needs_processor" rna_sample_needs_processor.save() OriginalFileSampleAssociation.objects.get_or_create( sample=rna_sample_needs_processor, original_file=rna_og_needs_processor) rna_dl_job_needs_processor = DownloaderJob() rna_dl_job_needs_processor.success = True rna_dl_job_needs_processor.worker_id = "worker_1" rna_dl_job_doesnt_need_processor.volume_index = "1" rna_dl_job_needs_processor.save() DownloaderJobOriginalFileAssociation.objects.get_or_create( downloader_job=rna_dl_job_needs_processor, original_file=rna_og_needs_processor) # Setup is done, actually run the command. command = Command() command.handle() # Test Microarray was handled correctly. ## Test that a missing processor job was created. self.assertEqual( 1, ProcessorJobOriginalFileAssociation.objects.filter( original_file=ma_og_needs_processor_1).count()) self.assertEqual( 1, ProcessorJobOriginalFileAssociation.objects.filter( original_file=ma_og_needs_processor_2).count()) self.assertEqual( 0, ProcessorJobOriginalFileAssociation.objects.filter( original_file=ma_og_archive).count()) ## Test that a processor job that wasn't missing wasn't created. ## Of course, we created one in test setup, so we're really ## checking that it's still only 1. self.assertEqual( 1, ProcessorJobOriginalFileAssociation.objects.filter( original_file=ma_og_doesnt_need_processor).count()) # Test Microarray was handled correctly. ## Test that the missing processor job was created. self.assertEqual( 1, ProcessorJobOriginalFileAssociation.objects.filter( original_file=rna_og_needs_processor).count()) ## Test that a processor job that wasn't missing wasn't created. ## Of course, we created one in test setup, so we're really ## checking that it's still only 1. self.assertEqual( 1, ProcessorJobOriginalFileAssociation.objects.filter( original_file=rna_og_doesnt_need_processor).count())