def test_download_file(self): dlj = DownloaderJob() dlj.accession_code = "ERR036" dlj.save() og = OriginalFile() og.source_filename = "ERR036000.fastq.gz" og.source_url = "ftp.sra.ebi.ac.uk/vol1/fastq/ERR036/ERR036000/ERR036000_1.fastq.gz" og.is_archive = True og.save() sample = Sample() sample.accession_code = "ERR036000" sample.save() assoc = OriginalFileSampleAssociation() assoc.sample = sample assoc.original_file = og assoc.save() assoc = DownloaderJobOriginalFileAssociation() assoc.downloader_job = dlj assoc.original_file = og assoc.save() result, downloaded_files = sra.download_sra(dlj.pk) self.assertTrue(result) self.assertEqual(downloaded_files[0].sha1, "1dfe5460a4101fe87feeffec0cb2e053f6695961") self.assertTrue(os.path.exists(downloaded_files[0].absolute_file_path))
def test_download_file_ncbi(self): dlj = DownloaderJob() dlj.accession_code = "SRR9117853" dlj.save() og = OriginalFile() og.source_filename = "SRR9117853.sra" og.source_url = "[email protected]:/sra/sra-instant/reads/ByRun/sra/SRR/SRR9117/SRR9117853/SRR9117853.sra" og.is_archive = True og.save() sample = Sample() sample.accession_code = "SRR9117853" sample.save() assoc = OriginalFileSampleAssociation() assoc.sample = sample assoc.original_file = og assoc.save() assoc = DownloaderJobOriginalFileAssociation() assoc.downloader_job = dlj assoc.original_file = og assoc.save() result, downloaded_files = sra.download_sra(dlj.pk) utils.end_downloader_job(dlj, result) self.assertTrue(result) self.assertEqual(downloaded_files[0].sha1, "e7ad484fe6f134ba7d1b2664e58cc15ae5a958cc") self.assertTrue(os.path.exists(downloaded_files[0].absolute_file_path))
def test_download_file(self, mock_send_job): mock_send_job.return_value = None dlj = DownloaderJob() dlj.accession_code = "ERR036" dlj.save() og = OriginalFile() og.source_filename = "ERR036000.fastq.gz" og.source_url = "ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR036/ERR036000/ERR036000_1.fastq.gz" og.is_archive = True og.save() sample = Sample() sample.accession_code = 'ERR036000' sample.save() assoc = OriginalFileSampleAssociation() assoc.sample = sample assoc.original_file = og assoc.save() assoc = DownloaderJobOriginalFileAssociation() assoc.downloader_job = dlj assoc.original_file = og assoc.save() success = sra.download_sra(dlj.pk)
def test_download_file_swapper(self, mock_send_job): mock_send_job.return_value = None dlj = DownloaderJob() dlj.accession_code = "DRR002116" dlj.save() og = OriginalFile() og.source_filename = "DRR002116.sra" og.source_url = "[email protected]:/sra/sra-instant/reads/ByRun/sra/DRR/DRR002/DRR002116/DRR002116.sra" og.is_archive = True og.save() sample = Sample() sample.accession_code = 'DRR002116' sample.save() assoc = OriginalFileSampleAssociation() assoc.sample = sample assoc.original_file = og assoc.save() assoc = DownloaderJobOriginalFileAssociation() assoc.downloader_job = dlj assoc.original_file = og assoc.save() result = sra._download_file(og.source_url, dlj, "/tmp", force_ftp=False) self.assertTrue(result)
def test_download_file_ncbi(self, mock_send_job): mock_send_job.return_value = None dlj = DownloaderJob() dlj.accession_code = "DRR002116" dlj.save() og = OriginalFile() og.source_filename = "DRR002116.sra" og.source_url = "[email protected]:/sra/sra-instant/reads/ByRun/sra/DRR/DRR002/DRR002116/DRR002116.sra" og.is_archive = True og.save() sample = Sample() sample.accession_code = 'DRR002116' sample.save() assoc = OriginalFileSampleAssociation() assoc.sample = sample assoc.original_file = og assoc.save() assoc = DownloaderJobOriginalFileAssociation() assoc.downloader_job = dlj assoc.original_file = og assoc.save() result, downloaded_files = sra.download_sra(dlj.pk) utils.end_downloader_job(dlj, result) self.assertTrue(result) self.assertEqual(downloaded_files[0].sha1, 'd5374e7fe047d4f76b165c3f5148ab2df9d42cea') self.assertTrue(os.path.exists(downloaded_files[0].absolute_file_path))
def test_download_file_unmated_reads(self): dlj = DownloaderJob() dlj.accession_code = "SRR1603661" dlj.save() og_1 = OriginalFile() og_1.source_filename = "SRR1603661_1.fastq.gz" og_1.source_url = "ftp.sra.ebi.ac.uk/vol1/fastq/SRR160/001/SRR1603661/SRR1603661_1.fastq.gz" og_1.expected_md5 = "502a9a482bfa5aa75865ccc0105ad13c" og_1.expected_size_in_bytes = 6751980628 og_1.is_archive = True og_1.save() og_2 = OriginalFile() og_2.source_filename = "SRR1603661_2.fastq.gz" og_2.source_url = "ftp.sra.ebi.ac.uk/vol1/fastq/SRR160/001/SRR1603661/SRR1603661_2.fastq.gz" og_1.expected_md5 = "fffd24457418d255991f54ec82a39d57" og_1.expected_size_in_bytes = 6949912932 og_2.is_archive = True og_2.save() sample = Sample() sample.accession_code = "SRR1603661" sample.save() assoc = OriginalFileSampleAssociation() assoc.sample = sample assoc.original_file = og_1 assoc.save() assoc = DownloaderJobOriginalFileAssociation() assoc.downloader_job = dlj assoc.original_file = og_1 assoc.save() assoc = OriginalFileSampleAssociation() assoc.sample = sample assoc.original_file = og_2 assoc.save() assoc = DownloaderJobOriginalFileAssociation() assoc.downloader_job = dlj assoc.original_file = og_2 assoc.save() result, downloaded_files = sra.download_sra(dlj.pk) utils.end_downloader_job(dlj, result) self.assertTrue(result) self.assertEqual(downloaded_files[0].sha1, "52bf22472069d04fa7767429f6ab78ebd10c0152") self.assertTrue(os.path.exists(downloaded_files[0].absolute_file_path))
def test_download_file(self, mock_send_job): mock_send_job.return_value = None dlj = DownloaderJob() dlj.save() og = OriginalFile() og.source_filename = "Aegilops_tauschii.ASM34733v1.37.gtf.gz" og.source_url = self.gtf_download_url og.is_archive = True og.save() assoc = DownloaderJobOriginalFileAssociation() assoc.downloader_job = dlj assoc.original_file = og assoc.save() transcriptome_index.download_transcriptome(dlj.pk)
def test_download_file_swapper(self): dlj = DownloaderJob() dlj.accession_code = "SRR9117853" dlj.save() og = OriginalFile() og.source_filename = "SRR9117853.sra" og.source_url = "[email protected]:/sra/sra-instant/reads/ByRun/sra/SRR/SRR9117/SRR9117853/SRR9117853.sra" og.is_archive = True og.save() sample = Sample() sample.accession_code = "SRR9117853" sample.save() assoc = OriginalFileSampleAssociation() assoc.sample = sample assoc.original_file = og assoc.save() assoc = DownloaderJobOriginalFileAssociation() assoc.downloader_job = dlj assoc.original_file = og assoc.save() result = sra._download_file(og.source_url, dlj, "/tmp/doomed", force_ftp=False) self.assertTrue(result)
def test_no_rnaseq(self): """Makes sure that no RNA-Seq data gets downloaded even if there's a job for it. """ dlj = DownloaderJob() dlj.accession_code = 'GSE103217' dlj.save() original_file = OriginalFile() original_file.filename = "GSE103217_family.xml.tgz" original_file.source_url = "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE103nnn/GSE103217/miniml/GSE103217_family.xml.tgz" original_file.source_filename = "GSE103217_family.xml.tgz" original_file.save() assoc = DownloaderJobOriginalFileAssociation() assoc.original_file = original_file assoc.downloader_job = dlj assoc.save() sample = Sample() sample.accession_code = 'GSE103217' sample.technology = "RNA-SEQ" sample.manufacturer = "ILLUMINA" sample.platform_accession_code = "Illumina HiSeq 2500" sample.save() og_assoc = OriginalFileSampleAssociation() og_assoc.sample = sample og_assoc.original_file = original_file og_assoc.save() download_result = geo.download_geo(dlj.id) self.assertFalse(download_result) dlj.refresh_from_db() self.assertFalse(dlj.success) # It's not necessarily that we didn't extract any files, but # none that were usable so it looks like none. self.assertEqual(dlj.failure_reason, "Failed to extract any downloaded files.")
def test_download_geo(self, mock_send_task): """ Tests the main 'download_geo' function. """ dlj = DownloaderJob() dlj.accession_code = 'GSE22427' dlj.save() original_file = OriginalFile() original_file.filename = "GSE22427_non-normalized.txt.gz" original_file.source_url = "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE22nnn/GSE22427/suppl/GSE22427_non-normalized.txt.gz" original_file.source_filename = "GSE22427_non-normalized.txt.gz" original_file.save() assoc = DownloaderJobOriginalFileAssociation() assoc.original_file = original_file assoc.downloader_job = dlj assoc.save() sample = Sample() sample.accession_code = 'GSE22427' sample.technology = "MICROARRAY" sample.manufacturer = "AGILENT" sample.has_raw = True # This is fake, but we don't currently support any agilent # platforms so we're using a platform that is supported. sample.platform_accession_code = "Illumina_RatRef-12_V1.0" sample.save() sample_annotation = SampleAnnotation() sample_annotation.sample = sample sample_annotation.data = { 'label_protocol_ch1': 'Agilent', 'label_protocol_ch2': 'Agilent' } sample_annotation.save() og_assoc = OriginalFileSampleAssociation() og_assoc.sample = sample og_assoc.original_file = original_file og_assoc.save() download_result = geo.download_geo(dlj.id) file_assocs = OriginalFileSampleAssociation.objects.filter( sample=sample) self.assertEqual(file_assocs.count(), 2) for file_assoc in file_assocs: original_file = file_assoc.original_file if original_file.filename.endswith(".gz"): # We delete the archive after we extract from it self.assertFalse(original_file.is_downloaded) else: self.assertTrue(original_file.is_downloaded) # Make sure it worked self.assertTrue(download_result) self.assertTrue(dlj.failure_reason is None) self.assertTrue(len(ProcessorJob.objects.all()) > 0) self.assertEqual(ProcessorJob.objects.all()[0].pipeline_applied, "AGILENT_TWOCOLOR_TO_PCL") self.assertEqual(ProcessorJob.objects.all()[0].ram_amount, 2048)
def test_download_multiple_zips(self, mock_send_job): """Tests that each sample gets one processor job no matter what. https://github.com/AlexsLemonade/refinebio/pull/351 deals with a bug where every file that was extracted to a directory got a processor job queued for it each time a downloader job ran which pointed to that directory. This test makes sure this bug stays squashed. It does so by running two downloader jobs for the same experiment which use two different zip files. Before this bug was squashed this would have resulted in the first sample getting a second processor job queued for it because the second downloader job would have found the file in the directory. """ dlj1 = DownloaderJob() dlj1.accession_code = 'E-MEXP-433' dlj1.save() original_file = OriginalFile() original_file.source_url = "ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MEXP/E-MEXP-433/E-MEXP-433.raw.1.zip" original_file.source_filename = "Waldhof_020604_R30_01-2753_U133A.CEL" original_file.save() assoc = DownloaderJobOriginalFileAssociation() assoc.original_file = original_file assoc.downloader_job = dlj1 assoc.save() sample = Sample() sample.accession_code = 'E-MEXP-433-Waldhof_020604_R30_01-2753_U133A' sample.technology = "MICROARRAY" sample.manufacturer = "AFFYMETRIX" sample.has_raw = True # This is fake, but we don't currently support any agilent # platforms so we're using a platform that is supported. sample.platform_accession_code = "hgu133a" sample.save() OriginalFileSampleAssociation.objects.get_or_create( sample=sample, original_file=original_file) dlj2 = DownloaderJob() dlj2.accession_code = 'E-MEXP-433' dlj2.save() original_file = OriginalFile() original_file.source_url = "ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MEXP/E-MEXP-433/E-MEXP-433.raw.2.zip" original_file.source_filename = "N08_U133A.CEL" original_file.save() assoc = DownloaderJobOriginalFileAssociation() assoc.original_file = original_file assoc.downloader_job = dlj2 assoc.save() sample = Sample() sample.accession_code = 'E-MEXP-433-N08_U133A' sample.technology = "MICROARRAY" sample.manufacturer = "AFFYMETRIX" sample.has_raw = True # This is fake, but we don't currently support any agilent # platforms so we're using a platform that is supported. sample.platform_accession_code = "hgu133a" sample.save() OriginalFileSampleAssociation.objects.get_or_create( sample=sample, original_file=original_file) array_express.download_array_express(dlj1.id) array_express.download_array_express(dlj2.id) self.assertEqual(ProcessorJob.objects.all().count(), 2)
def create_processor_job(pipeline="AFFY_TO_PCL", ram_amount=2048, start_time=None): og_file_1 = OriginalFile() og_file_1.source_filename = "doesn't matter" og_file_1.filename = "this either" og_file_1.absolute_file_path = "nor this" og_file_1.save() og_file_2 = OriginalFile() og_file_2.source_filename = "doesn't matter" og_file_2.filename = "this either" og_file_2.absolute_file_path = "nor this" og_file_2.save() downloader_job = None if pipeline == "AFFY_TO_PCL": downloader_job = DownloaderJob( downloader_task="SRA", batch_job_id="DEFAULT", num_retries=0, accession_code="NUNYA", success=None, ) downloader_job.save() assoc = DownloaderJobOriginalFileAssociation() assoc.original_file = og_file_2 assoc.downloader_job = downloader_job assoc.save() assoc1 = DownloaderJobOriginalFileAssociation() assoc1.original_file = og_file_1 assoc1.downloader_job = downloader_job assoc1.save() processor_job = ProcessorJob( downloader_job=downloader_job, pipeline_applied=pipeline, batch_job_id="PROCESSOR/dispatch-1528945054-e8eaf540", ram_amount=ram_amount, num_retries=0, success=None, start_time=start_time, ) processor_job.save() assoc1 = ProcessorJobOriginalFileAssociation() assoc1.original_file = og_file_1 assoc1.processor_job = processor_job assoc1.save() assoc = ProcessorJobOriginalFileAssociation() assoc.original_file = og_file_2 assoc.processor_job = processor_job assoc.save() return processor_job
def test_no_repeat_jobs(self): """Make sure that queue_downloader_jobs queues all expected Downloader jobs for a given experiment. """ # First, create an experiment with two samples associated with it # and create two original files for each of those samples. experiment_object = Experiment() experiment_object.accession_code = "Experiment1" experiment_object.save() sample_object = Sample() sample_object.accession_code = "Sample1" sample_object.platform_accession_code = "Illumina Genome Analyzer" sample_object.platform_accession_name = "Illumina Genome Analyzer" sample_object.technology = "RNA-SEQ" sample_object.manufacturer = "ILLUMINA" sample_object.source_database = "SRA" sample_object.save() original_file_1 = OriginalFile() original_file_1.source_url = "first_url" original_file_1.source_filename = "first_filename" original_file_1.is_downloaded = False original_file_1.has_raw = True original_file_1.save() original_file_sample_association = OriginalFileSampleAssociation() original_file_sample_association.original_file = original_file_1 original_file_sample_association.sample = sample_object original_file_sample_association.save() original_file_2 = OriginalFile() original_file_2.source_url = "second_url" original_file_2.source_filename = "second_filename" original_file_2.is_downloaded = False original_file_2.has_raw = True original_file_2.save() original_file_sample_association = OriginalFileSampleAssociation() original_file_sample_association.original_file = original_file_2 original_file_sample_association.sample = sample_object original_file_sample_association.save() dlj = DownloaderJob() dlj.save() DownloaderJobOriginalFileAssociation( downloader_job=dlj, original_file=original_file_1 ).save() DownloaderJobOriginalFileAssociation( downloader_job=dlj, original_file=original_file_2 ).save() survey_job = SurveyJob(source_type="SRA") survey_job.save() surveyor = SraSurveyor(survey_job) surveyor.queue_downloader_job_for_original_files( [original_file_1, original_file_2], experiment_object.accession_code ) # We made one DownloaderJob in this test, so # queue_downloader_job_for_original_files didn't have anything # to do, so there should still be only one: self.assertEqual(1, DownloaderJob.objects.all().count())
def setUpClass(cls): super(ESTestCases, cls).setUpClass() # ref https://stackoverflow.com/a/29655301/763705 """Set up class.""" experiment = Experiment() experiment.accession_code = "GSE000-X" experiment.title = "NONONONO" experiment.description = "Boooooourns. Wasabi." experiment.technology = "RNA-SEQ" experiment.save() experiment = Experiment() experiment.accession_code = "GSE123-X" experiment.title = "Hey Ho Let's Go" experiment.description = ( "This is a very exciting test experiment. Faygo soda. Blah blah blah." ) experiment.technology = "MICROARRAY" experiment.num_processed_samples = 1 # added below experiment.num_total_samples = 1 experiment.num_downloadable_samples = 1 experiment.save() experiment_annotation = ExperimentAnnotation() experiment_annotation.data = {"hello": "world", "123": 456} experiment_annotation.experiment = experiment experiment_annotation.save() sample = Sample() sample.title = "123" sample.accession_code = "123" sample.save() organism = Organism( name="AILUROPODA_MELANOLEUCA", taxonomy_id=9646, is_scientific_name=True ) organism.save() sample = Sample() sample.title = "789" sample.accession_code = "789" sample.is_processed = True sample.organism = organism sample.save() sample_annotation = SampleAnnotation() sample_annotation.data = {"goodbye": "world", "789": 123} sample_annotation.sample = sample sample_annotation.save() original_file = OriginalFile() original_file.save() original_file_sample_association = OriginalFileSampleAssociation() original_file_sample_association.sample = sample original_file_sample_association.original_file = original_file original_file_sample_association.save() downloader_job = DownloaderJob() downloader_job.save() download_assoc = DownloaderJobOriginalFileAssociation() download_assoc.original_file = original_file download_assoc.downloader_job = downloader_job download_assoc.save() processor_job = ProcessorJob() processor_job.save() processor_assoc = ProcessorJobOriginalFileAssociation() processor_assoc.original_file = original_file processor_assoc.processor_job = processor_job processor_assoc.save() # associate the experiment with the sample experiment_sample_association = ExperimentSampleAssociation() experiment_sample_association.sample = sample experiment_sample_association.experiment = experiment experiment_sample_association.save() result = ComputationalResult() result.save() # and create a qn tarjet for the sample computational_result = ComputationalResultAnnotation() computational_result.result = result computational_result.data = {"is_qn": True, "organism_id": sample.organism.id} computational_result.save() # and associate it with the sample organism sample.organism.qn_target = result sample.organism.save() sra = SampleResultAssociation() sra.sample = sample sra.result = result sra.save() result = ComputationalResult() result.save() sra = SampleResultAssociation() sra.sample = sample sra.result = result sra.save() # clear default cache and reindex # otherwise the organisms with qn_targes will be cached. cache.clear() call_command("search_index", "--rebuild", "-f")
def create_downloader_job(self): job = DownloaderJob( downloader_task="SRA", nomad_job_id="DOWNLOADER/dispatch-1528945054-e8eaf540", num_retries=0, accession_code="NUNYA", success=None) job.save() og_file = OriginalFile() og_file.source_filename = "doesn't matter" og_file.filename = "this either" og_file.absolute_file_path = "nor this" og_file.save() assoc1 = DownloaderJobOriginalFileAssociation() assoc1.original_file = og_file assoc1.downloader_job = job assoc1.save() og_file = OriginalFile() og_file.source_filename = "doesn't matter" og_file.filename = "this either" og_file.absolute_file_path = "nor this" og_file.save() assoc = DownloaderJobOriginalFileAssociation() assoc.original_file = og_file assoc.downloader_job = job assoc.save() return job
def test_download_aspera_and_ftp(self): """ Tests the main 'download_geo' function. """ dlj = DownloaderJob() dlj.accession_code = 'GSE22427' dlj.save() original_file = OriginalFile() original_file.source_url = "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE22nnn/GSE22427/suppl/GSE22427_non-normalized.txt.gz" original_file.source_filename = "GSE22427_non-normalized.txt.gz" original_file.save() assoc = DownloaderJobOriginalFileAssociation() assoc.original_file = original_file assoc.downloader_job = dlj assoc.save() sample = Sample() sample.accession_code = 'GSE22427' sample.save() sample_annotation = SampleAnnotation() sample_annotation.sample = sample sample_annotation.data = { 'label_protocol_ch1': 'Agilent', 'label_protocol_ch2': 'Agilent' } sample_annotation.save() og_assoc = OriginalFileSampleAssociation() og_assoc.sample = sample og_assoc.original_file = original_file og_assoc.save() LOCAL_ROOT_DIR = "/home/user/data_store" os.makedirs(LOCAL_ROOT_DIR + '/' + sample.accession_code, exist_ok=True) dl_file_path = LOCAL_ROOT_DIR + '/' + sample.accession_code + '/' + original_file.source_url.split( '/')[-1] # Aspera result = geo._download_file(original_file.source_url, file_path=dl_file_path, job=dlj, force_ftp=False) self.assertTrue(result) self.assertTrue(os.path.exists(dl_file_path)) os.remove(dl_file_path) # FTP result = geo._download_file(original_file.source_url, file_path=dl_file_path, job=dlj, force_ftp=True) self.assertTrue(result) self.assertTrue(os.path.exists(dl_file_path)) os.remove(dl_file_path) # Aspera, fail result = geo._download_file_aspera("https://rich.zone/cool_horse.jpg", target_file_path=dl_file_path, downloader_job=dlj, attempt=5) self.assertFalse(result) self.assertTrue(dlj.failure_reason != None)
def test_organism_shepherd_command(self, mock_nomad, mock_send_job, mock_get_active_volumes): """Tests that the organism shepherd requeues jobs in the right order. The situation we're setting up is basically this: * There are two experiments. * One of them has 1/2 samples processed, the other 0/1 * One of them needs a DownloaderJob requeued and the other needs a ProcessorJob requued. And what we're going to test for is: * Both of the jobs that need to be requeued are requeued. * The experiment with a processed sample is requeued first because it has a higher completion percentage. """ # First, set up our mocks to prevent network calls. mock_send_job.return_value = True active_volumes = {"1", "2", "3"} mock_get_active_volumes.return_value = active_volumes def mock_init_nomad(host, port=0, timeout=0): ret_value = MagicMock() ret_value.jobs = MagicMock() ret_value.jobs.get_jobs = MagicMock() ret_value.jobs.get_jobs.side_effect = lambda: [] return ret_value mock_nomad.side_effect = mock_init_nomad zebrafish = Organism(name="DANIO_RERIO", taxonomy_id=1337, is_scientific_name=True) zebrafish.save() # Experiment that is 0% complete. zero_percent_experiment = Experiment(accession_code='ERP037000') zero_percent_experiment.technology = 'RNA-SEQ' zero_percent_experiment.save() organism_assoc = ExperimentOrganismAssociation.objects.create( organism=zebrafish, experiment=zero_percent_experiment) zero_percent = OriginalFile() zero_percent.filename = "ERR037001.fastq.gz" zero_percent.source_filename = "ERR037001.fastq.gz" zero_percent.source_url = "ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR037/ERR037001/ERR037001_1.fastq.gz" zero_percent.is_archive = True zero_percent.save() zero_percent_sample = Sample() zero_percent_sample.accession_code = 'ERR037001' zero_percent_sample.organism = zebrafish zero_percent_sample.save() assoc = OriginalFileSampleAssociation() assoc.sample = zero_percent_sample assoc.original_file = zero_percent assoc.save() assoc = ExperimentSampleAssociation() assoc.sample = zero_percent_sample assoc.experiment = zero_percent_experiment assoc.save() # TODO: fix names of all the variables to be appropriate for this test case. zero_percent_dl_job = DownloaderJob() zero_percent_dl_job.accession_code = zero_percent_sample.accession_code zero_percent_dl_job.downloader_task = "SRA" zero_percent_dl_job.start_time = timezone.now() zero_percent_dl_job.end_time = timezone.now() zero_percent_dl_job.success = False zero_percent_dl_job.save() assoc = DownloaderJobOriginalFileAssociation() assoc.downloader_job = zero_percent_dl_job assoc.original_file = zero_percent assoc.save() # Experiment that is 50% complete. fify_percent_experiment = Experiment(accession_code='ERP036000') fify_percent_experiment.technology = 'RNA-SEQ' fify_percent_experiment.save() organism_assoc = ExperimentOrganismAssociation.objects.create( organism=zebrafish, experiment=fify_percent_experiment) ## First sample, this one has been processed. successful_pj = ProcessorJob() successful_pj.accession_code = "ERR036000" successful_pj.pipeline_applied = "SALMON" successful_pj.ram_amount = 12288 successful_pj.start_time = timezone.now() successful_pj.end_time = timezone.now() successful_pj.success = True successful_pj.save() successful_og = OriginalFile() successful_og.filename = "ERR036000.fastq.gz" successful_og.source_filename = "ERR036000.fastq.gz" successful_og.source_url = "ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR036/ERR036000/ERR036000_1.fastq.gz" successful_og.is_archive = True successful_og.save() successful_sample = Sample() successful_sample.accession_code = 'ERR036000' successful_sample.organism = zebrafish successful_sample.save() assoc = OriginalFileSampleAssociation() assoc.sample = successful_sample assoc.original_file = successful_og assoc.save() assoc = ProcessorJobOriginalFileAssociation() assoc.processor_job = successful_pj assoc.original_file = successful_og assoc.save() assoc = ExperimentSampleAssociation() assoc.sample = successful_sample assoc.experiment = fify_percent_experiment assoc.save() ## Second sample, this one hasn't been processed. fifty_percent_unprocessed_og = OriginalFile() fifty_percent_unprocessed_og.filename = "ERR036001.fastq.gz" fifty_percent_unprocessed_og.source_filename = "ERR036001.fastq.gz" fifty_percent_unprocessed_og.source_url = "ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR036/ERR036001/ERR036001_1.fastq.gz" fifty_percent_unprocessed_og.is_archive = True fifty_percent_unprocessed_og.save() fifty_percent_unprocessed_sample = Sample() fifty_percent_unprocessed_sample.accession_code = 'ERR036001' fifty_percent_unprocessed_sample.organism = zebrafish fifty_percent_unprocessed_sample.save() assoc = OriginalFileSampleAssociation() assoc.sample = fifty_percent_unprocessed_sample assoc.original_file = fifty_percent_unprocessed_og assoc.save() assoc = ExperimentSampleAssociation() assoc.sample = fifty_percent_unprocessed_sample assoc.experiment = fify_percent_experiment assoc.save() fifty_percent_processor_job = ProcessorJob() fifty_percent_processor_job.pipeline_applied = "SALMON" fifty_percent_processor_job.accession_code = fifty_percent_unprocessed_sample.accession_code fifty_percent_processor_job.ram_amount = 12288 fifty_percent_processor_job.start_time = timezone.now() fifty_percent_processor_job.end_time = timezone.now() fifty_percent_processor_job.success = False fifty_percent_processor_job.save() assoc = ProcessorJobOriginalFileAssociation() assoc.processor_job = fifty_percent_processor_job assoc.original_file = fifty_percent_unprocessed_og assoc.save() # Setup is done, actually run the command. args = [] options = {"organism_name": "DANIO_RERIO"} call_command("organism_shepherd", *args, **options) # Verify that the jobs were called in the correct order. mock_calls = mock_send_job.mock_calls first_call_job_type = mock_calls[0][1][0] first_call_job_object = mock_calls[0][2]["job"] self.assertEqual(first_call_job_type, ProcessorPipeline.SALMON) self.assertEqual(first_call_job_object.pipeline_applied, fifty_percent_processor_job.pipeline_applied) self.assertEqual(first_call_job_object.ram_amount, fifty_percent_processor_job.ram_amount) self.assertIn(first_call_job_object.volume_index, active_volumes) fifty_percent_processor_job.refresh_from_db() self.assertEqual(first_call_job_object, fifty_percent_processor_job.retried_job) second_call_job_type = mock_calls[1][1][0] second_call_job_object = mock_calls[1][2]["job"] self.assertEqual(second_call_job_type, Downloaders.SRA) self.assertEqual(second_call_job_object.accession_code, zero_percent_dl_job.accession_code) self.assertEqual(second_call_job_object.downloader_task, zero_percent_dl_job.downloader_task) zero_percent_dl_job.refresh_from_db() self.assertEqual(second_call_job_object, zero_percent_dl_job.retried_job)
def test_dharma(self): dlj1 = DownloaderJob() dlj1.accession_code = 'D1' dlj1.worker_id = get_instance_id() dlj1.start_time = datetime.datetime.now() dlj1.save() dlj2 = DownloaderJob() dlj2.accession_code = 'D2' dlj2.worker_id = get_instance_id() dlj2.start_time = datetime.datetime.now() dlj2.save() dlj3 = DownloaderJob() dlj3.accession_code = 'D3' dlj3.worker_id = get_instance_id() dlj3.save() original_file = OriginalFile() original_file.source_url = "ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MEXP/E-MEXP-433/E-MEXP-433.raw.1.zip" original_file.source_filename = "Waldhof_020604_R30_01-2753_U133A.CEL" original_file.save() assoc = DownloaderJobOriginalFileAssociation() assoc.original_file = original_file assoc.downloader_job = dlj3 assoc.save() sample = Sample() sample.accession_code = 'Blahblahblah' sample.technology = "MICROARRAY" sample.manufacturer = "AFFYMETRIX" sample.has_raw = True sample.platform_accession_code = "hgu133a" sample.save() OriginalFileSampleAssociation.objects.get_or_create( sample=sample, original_file=original_file) exited = False try: utils.start_job(dlj3.id, max_downloader_jobs_per_node=2, force_harakiri=True) except SystemExit as e: # This is supposed to happen! self.assertTrue(True) exited = True except Exception as e: # This isn't! self.assertTrue(False) self.assertTrue(exited) exited = False try: utils.start_job(dlj3.id, max_downloader_jobs_per_node=15, force_harakiri=True) except SystemExit as e: # This is not supposed to happen! self.assertTrue(False) exited = True except Exception as e: # This is! self.assertTrue(True) self.assertFalse(exited)
def setUp(self): # Saving this for if we have protected endpoints # self.superuser = User.objects.create_superuser('john', '*****@*****.**', 'johnpassword') # self.client.login(username='******', password='******') # self.user = User.objects.create(username="******") experiment = Experiment() experiment.accession_code = "GSE000" experiment.alternate_accession_code = "E-GEOD-000" experiment.title = "NONONONO" experiment.description = "Boooooourns. Wasabi." experiment.technology = "RNA-SEQ" experiment.save() experiment = Experiment() experiment.accession_code = "GSE123" experiment.title = "Hey Ho Let's Go" experiment.description = ( "This is a very exciting test experiment. Faygo soda. Blah blah blah." ) experiment.technology = "MICROARRAY" experiment.save() self.experiment = experiment experiment_annotation = ExperimentAnnotation() experiment_annotation.data = {"hello": "world", "123": 456} experiment_annotation.experiment = experiment experiment_annotation.save() # Create 26 test organisms numbered 0-25 for pagination test, so there should be 29 organisms total (with the 3 others below) for i in range(26): Organism(name=("TEST_ORGANISM_{}".format(i)), taxonomy_id=(1234 + i)).save() ailuropoda = Organism(name="AILUROPODA_MELANOLEUCA", taxonomy_id=9646, is_scientific_name=True) ailuropoda.save() self.homo_sapiens = Organism(name="HOMO_SAPIENS", taxonomy_id=9606, is_scientific_name=True) self.homo_sapiens.save() self.danio_rerio = Organism(name="DANIO_RERIO", taxonomy_id=1337, is_scientific_name=True) self.danio_rerio.save() sample = Sample() sample.title = "123" sample.accession_code = "123" sample.is_processed = True sample.organism = ailuropoda sample.save() sample = Sample() sample.title = "789" sample.accession_code = "789" sample.is_processed = True sample.organism = ailuropoda sample.save() self.sample = sample # add qn target for sample organism result = ComputationalResult() result.commands.append("create_qn_target.py") result.is_ccdl = True result.is_public = True result.processor = None result.save() cra = ComputationalResultAnnotation() cra.result = result cra.data = {"organism_id": ailuropoda.id, "is_qn": True} cra.save() ailuropoda.qn_target = result ailuropoda.save() sample_annotation = SampleAnnotation() sample_annotation.data = {"goodbye": "world", "789": 123} sample_annotation.sample = sample sample_annotation.save() original_file = OriginalFile() original_file.save() original_file_sample_association = OriginalFileSampleAssociation() original_file_sample_association.sample = sample original_file_sample_association.original_file = original_file original_file_sample_association.save() downloader_job = DownloaderJob() downloader_job.save() download_assoc = DownloaderJobOriginalFileAssociation() download_assoc.original_file = original_file download_assoc.downloader_job = downloader_job download_assoc.save() processor_job = ProcessorJob() processor_job.save() processor_assoc = ProcessorJobOriginalFileAssociation() processor_assoc.original_file = original_file processor_assoc.processor_job = processor_job processor_assoc.save() experiment_sample_association = ExperimentSampleAssociation() experiment_sample_association.sample = sample experiment_sample_association.experiment = experiment experiment_sample_association.save() experiment.num_total_samples = 1 experiment.num_processed_samples = 1 experiment.save() result = ComputationalResult() result.save() sra = SampleResultAssociation() sra.sample = sample sra.result = result sra.save() result = ComputationalResult() result.save() sra = SampleResultAssociation() sra.sample = sample sra.result = result sra.save() processor = Processor() processor.name = "Salmon Quant" processor.version = "v9.9.9" processor.docker_image = "dr_salmon" processor.environment = '{"some": "environment"}' processor.save() computational_result_short = ComputationalResult(processor=processor) computational_result_short.save() organism_index = OrganismIndex() organism_index.index_type = "TRANSCRIPTOME_SHORT" organism_index.organism = self.danio_rerio organism_index.result = computational_result_short organism_index.absolute_directory_path = ( "/home/user/data_store/salmon_tests/TRANSCRIPTOME_INDEX/SHORT") organism_index.is_public = True organism_index.s3_url = "not_blank" organism_index.save() return
def create_downloader_job(): job = DownloaderJob( downloader_task="SRA", batch_job_id="DEFAULT", num_retries=0, accession_code="NUNYA", success=None, ) job.save() og_file = OriginalFile() og_file.source_filename = "doesn't matter" og_file.filename = "this either" og_file.absolute_file_path = "nor this" og_file.save() assoc1 = DownloaderJobOriginalFileAssociation() assoc1.original_file = og_file assoc1.downloader_job = job assoc1.save() og_file = OriginalFile() og_file.source_filename = "doesn't matter" og_file.filename = "this either" og_file.absolute_file_path = "nor this" og_file.save() assoc = DownloaderJobOriginalFileAssociation() assoc.original_file = og_file assoc.downloader_job = job assoc.save() return job