def test_deblur_keyerror(self): # generating filepaths fd, fp = mkstemp(suffix='_seqs.demux') close(fd) self._clean_up_files.append(fp) copyfile('support_files/filtered_5_seqs.demux', fp) # inserting new prep template prep_info_dict = { 'SKB7.640196': { 'description_prep': 'SKB7', 'platform': 'Illumina' }, 'SKB8.640193': { 'description_prep': 'SKB8', 'platform': 'Illumina' } } data = { 'prep_info': dumps(prep_info_dict), # magic #1 = testing study 'study': 1, 'data_type': '16S' } pid = self.qclient.post('/apitest/prep_template/', data=data)['prep'] # inserting artifacts data = { 'filepaths': dumps([(fp, 'preprocessed_demux')]), 'type': "Demultiplexed", 'name': "New demultiplexed artifact", 'prep': pid } aid = self.qclient.post('/apitest/artifact/', data=data)['artifact'] self.params['Demultiplexed sequences'] = aid data = { 'user': '******', 'command': dumps(['deblur', '2021.09', 'Deblur']), 'status': 'running', 'parameters': dumps(self.params) } jid = self.qclient.post('/apitest/processing_job/', data=data)['job'] out_dir = mkdtemp() self._clean_up_files.append(out_dir) # pre-populate archive with fragment placements # make sure that at least one sequence got no placements via SEPP self.features[('TACGGAGGGTGCAAGCGTTATCCGGATTCACTGGGTTTAAAGGGTGCGTAGGT' 'GGGTTGGTAAGTCAGTGGTGAAATCTCCGGGCTTAACTCGGAAACTG')] = '' self.qclient.patch(url="/qiita_db/archive/observations/", op="add", path=jid, value=dumps(self.features)) success, ainfo, msg = deblur(self.qclient, jid, self.params, out_dir) self.assertEqual("", msg) self.assertTrue(success)
def test_deblur(self): # generating filepaths fd, fp = mkstemp(suffix='_seqs.demux') close(fd) self._clean_up_files.append(fp) copyfile('support_files/filtered_5_seqs.demux', fp) # inserting new prep template prep_info_dict = { 'SKB7.640196': {'description_prep': 'SKB7'}, 'SKB8.640193': {'description_prep': 'SKB8'} } data = {'prep_info': dumps(prep_info_dict), # magic #1 = testing study 'study': 1, 'data_type': '16S'} pid = self.qclient.post('/apitest/prep_template/', data=data)['prep'] # inserting artifacts data = { 'filepaths': dumps([(fp, 'preprocessed_fastq')]), 'type': "Demultiplexed", 'name': "New demultiplexed artifact", 'prep': pid} aid = self.qclient.post('/apitest/artifact/', data=data)['artifact'] self.params['Demultiplexed sequences'] = aid data = {'user': '******', 'command': dumps(['deblur', '1.0.4', 'Deblur']), 'status': 'running', 'parameters': dumps(self.params)} jid = self.qclient.post('/apitest/processing_job/', data=data)['job'] out_dir = mkdtemp() self._clean_up_files.append(out_dir) # pre-populate archive with fragment placements self.qclient.patch(url="/qiita_db/archive/observations/", op="add", path=jid, value=dumps(self.features)) success, ainfo, msg = deblur(self.qclient, jid, self.params, out_dir) self.assertEqual("", msg) self.assertTrue(success) self.assertEqual("BIOM", ainfo[0].artifact_type) self.assertEqual("BIOM", ainfo[1].artifact_type) self.assertEqual( [(join(out_dir, 'deblur_out', 'all.biom'), 'biom'), (join(out_dir, 'deblur_out', 'all.seqs.fa'), 'preprocessed_fasta')], ainfo[0].files) self.assertEqual( [(join(out_dir, 'deblur_out', 'reference-hit.biom'), 'biom'), (join(out_dir, 'deblur_out', 'reference-hit.seqs.fa'), 'preprocessed_fasta'), (None, 'plain_text')], ainfo[1].files)
def test_deblur_failing_sepp(self): # generating filepaths fd, fp = mkstemp(suffix='_seqs.demux') close(fd) self._clean_up_files.append(fp) copyfile('support_files/filtered_5_seqs.demux', fp) # inserting new prep template prep_info_dict = { 'SKB7.640196': { 'description_prep': 'SKB7', 'platform': 'Illumina' }, 'SKB8.640193': { 'description_prep': 'SKB8', 'platform': 'Illumina' } } data = { 'prep_info': dumps(prep_info_dict), # magic #1 = testing study 'study': 1, 'data_type': '16S' } pid = self.qclient.post('/apitest/prep_template/', data=data)['prep'] # inserting artifacts data = { 'filepaths': dumps([(fp, 'preprocessed_demux')]), 'type': "Demultiplexed", 'name': "New demultiplexed artifact", 'prep': pid } aid = self.qclient.post('/apitest/artifact/', data=data)['artifact'] self.params['Demultiplexed sequences'] = aid data = { 'user': '******', 'command': dumps(['deblur', '2021.09', 'Deblur']), 'status': 'running', 'parameters': dumps(self.params) } jid = self.qclient.post('/apitest/processing_job/', data=data)['job'] out_dir = mkdtemp() self._clean_up_files.append(out_dir) # create a fake sepp binary that will always fail fp_fake_sepp = join(out_dir, 'run-sepp.sh') with open(fp_fake_sepp, 'w') as f: f.write('#!/bin/bash\nexit 123\n') chmod(fp_fake_sepp, 0o775) environ['PATH'] = '%s:%s' % (out_dir, self.oldpath) success, ainfo, msg = deblur(self.qclient, jid, self.params, out_dir) self.assertFalse(success) self.assertEqual(ainfo, None) self.assertIn('Error running run-sepp.sh', msg)
def test_deblur_no_target_gene(self): # generating filepaths fd, fp = mkstemp(suffix='_seqs.demux') close(fd) self._clean_up_files.append(fp) copyfile('support_files/no_sepp_seqs.demux', fp) prep_info_dict = { 'SKB7.640196': { 'description_prep': 'SKB7', 'platform': 'Illumina' }, 'SKB8.640193': { 'description_prep': 'SKB8', 'platform': 'Illumina' } } data = { 'prep_info': dumps(prep_info_dict), # magic #1 = testing study 'study': 1, 'data_type': 'Metagenomic' } pid = self.qclient.post('/apitest/prep_template/', data=data)['prep'] # inserting artifacts data = { 'filepaths': dumps([(fp, 'preprocessed_fastq')]), 'type': "Demultiplexed", 'name': "New demultiplexed artifact", 'prep': pid } aid = self.qclient.post('/apitest/artifact/', data=data)['artifact'] self.params['Demultiplexed sequences'] = aid data = { 'user': '******', 'command': dumps(['deblur', '2021.09', 'Deblur']), 'status': 'running', 'parameters': dumps(self.params) } jid = self.qclient.post('/apitest/processing_job/', data=data)['job'] out_dir = mkdtemp() self._clean_up_files.append(out_dir) # pre-populate archive with fragment placements self.qclient.patch(url="/qiita_db/archive/observations/", op="add", path=jid, value=dumps(self.features)) success, ainfo, msg = deblur(self.qclient, jid, self.params, out_dir) self.assertEqual( 'deblur was developed only for amplicon sequencing data', msg) self.assertFalse(success)
def test_no_valid_values_platform_error(self): # generating filepaths fd, fp = mkstemp(suffix='_seqs.demux') close(fd) self._clean_up_files.append(fp) copyfile('support_files/filtered_5_seqs.demux', fp) # inserting new prep template prep_info_dict = { 'SKB7.640196': { 'description_prep': 'SKB7', 'platform': 'foo' }, 'SKB8.640193': { 'description_prep': 'SKB8', 'platform': 'bar' } } data = { 'prep_info': dumps(prep_info_dict), # magic #1 = testing study 'study': 1, 'data_type': '16S' } pid = self.qclient.post('/apitest/prep_template/', data=data)['prep'] # inserting artifacts data = { 'filepaths': dumps([(fp, 'preprocessed_demux')]), 'type': "Demultiplexed", 'name': "New demultiplexed artifact", 'prep': pid } aid = self.qclient.post('/apitest/artifact/', data=data)['artifact'] self.params['Demultiplexed sequences'] = aid data = { 'user': '******', 'command': dumps(['deblur', '2021.09', 'Deblur']), 'status': 'running', 'parameters': dumps(self.params) } jid = self.qclient.post('/apitest/processing_job/', data=data)['job'] out_dir = mkdtemp() self._clean_up_files.append(out_dir) success, ainfo, msg = deblur(self.qclient, jid, self.params, out_dir) self.assertEqual( 'deblur is only valid for Illumina `platform`, ' 'current values in the Preparation Information File: ' 'bar, foo', msg) self.assertFalse(success)
def test_deblur(self): # generating filepaths fd, fp = mkstemp(suffix='_seqs.demux') close(fd) self._clean_up_files.append(fp) copyfile('support_files/filtered_5_seqs.demux', fp) # inserting new prep template prep_info_dict = { 'SKB7.640196': { 'description_prep': 'SKB7' }, 'SKB8.640193': { 'description_prep': 'SKB8' } } data = { 'prep_info': dumps(prep_info_dict), # magic #1 = testing study 'study': 1, 'data_type': '16S' } pid = self.qclient.post('/apitest/prep_template/', data=data)['prep'] # inserting artifacts data = { 'filepaths': dumps([(fp, 'preprocessed_fastq')]), 'type': "Demultiplexed", 'name': "New demultiplexed artifact", 'prep': pid } aid = self.qclient.post('/apitest/artifact/', data=data)['artifact'] self.params['seqs-fp'] = aid data = { 'user': '******', 'command': dumps(['deblur', '0.1.0', 'deblur-workflow']), 'status': 'running', 'parameters': dumps(self.params) } jid = self.qclient.post('/apitest/processing_job/', data=data)['job'] out_dir = mkdtemp() self._clean_up_files.append(out_dir) success, ainfo, msg = deblur(self.qclient, jid, self.params, out_dir) self.assertEqual("", msg) self.assertTrue(success) self.assertEqual("BIOM", ainfo[0].artifact_type) self.assertEqual("BIOM", ainfo[1].artifact_type) self.assertEqual([(join(out_dir, 'deblur_out', 'final.biom'), 'biom'), (join(out_dir, 'deblur_out', 'final.seqs.fa'), 'preprocessed_fasta')], ainfo[0].files) self.assertEqual( [(join(out_dir, 'deblur_out', 'final.only-16s.biom'), 'biom'), (join(out_dir, 'deblur_out', 'final.seqs.fa.no_artifacts'), 'preprocessed_fasta')], ainfo[1].files)
def test_fragment_archiving(self): # generating filepaths fd, fp = mkstemp(suffix='_seqs.demux') close(fd) self._clean_up_files.append(fp) copyfile('support_files/filtered_5_seqs.demux', fp) # inserting new prep template prep_info_dict = { 'SKB7.640196': { 'description_prep': 'SKB7' }, 'SKB8.640193': { 'description_prep': 'SKB8' } } data = { 'prep_info': dumps(prep_info_dict), # magic #1 = testing study 'study': 1, 'data_type': '16S' } pid = self.qclient.post('/apitest/prep_template/', data=data)['prep'] # inserting artifacts data = { 'filepaths': dumps([(fp, 'preprocessed_demux')]), 'type': "Demultiplexed", 'name': "New demultiplexed artifact", 'prep': pid } aid = self.qclient.post('/apitest/artifact/', data=data)['artifact'] self.params['Demultiplexed sequences'] = aid data = { 'user': '******', 'command': dumps(['deblur', '1.0.4', 'Deblur']), 'status': 'running', 'parameters': dumps(self.params) } jid = self.qclient.post('/apitest/processing_job/', data=data)['job'] # populate Qiita archive with some precomputed placements # placements.json is output from a SEPP run for the resulting Deblur # table, but with "tree" value removed for the sake of space features = dict() with open('support_files/sepp/placements.json', 'r') as f: for placement in json.load(f)['placements']: fragment = placement['nm'][0][0] # exclude 10 sequences to trigger SEPP computation later on if fragment not in self.novel_seqs: features[fragment] = json.dumps(placement['p']) # add in a feature which should be rejected by SEPP features['A' * len(self.novel_seqs[0])] = "" # 1) check that archive is currently empty: observations = self.qclient.post("/qiita_db/archive/observations/", data={ 'job_id': jid, 'features': list(features.keys()) }) self.assertTrue(len(observations.keys()) == 0) # 2) insert placements into archive ... self.qclient.patch(url="/qiita_db/archive/observations/", op="add", path=jid, value=json.dumps(features)) # ... and check that archive does hold those placements now: observations = self.qclient.post("/qiita_db/archive/observations/", data={ 'job_id': jid, 'features': list(features.keys()) }) self.assertTrue(len(observations.keys()) == len(features.keys())) # 3) execute deblur job with subsequent SEPP run and tiny reference out_dir = mkdtemp() self._clean_up_files.append(out_dir) self.params['Reference phylogeny for SEPP'] = 'tiny' success, ainfo, msg = deblur(self.qclient, jid, self.params, out_dir) self.assertEqual("", msg) self.assertTrue(success) # ensure number of stored placements did grow or at least did not observations_2 = self.qclient.post("/qiita_db/archive/observations/", data={ 'job_id': jid, 'features': list(features.keys()) + [self.novel_seqs[0]] }) self.assertTrue(len(observations.keys()) <= len(observations_2.keys())) self.maxDiff = None # test specific placement values for one fragment that has # been pre-populated ... exp_placement = ( '[[226990, -15902.052, 0.14311954, 9.856619e-06, 6.113515e-06], ' '[226989, -15902.052, 0.14311936, 7.0000096e-06, 6.113515e-06], ' '[226993, -15902.052, 0.14311917, 8.61664e-06, 6.113515e-06], ' '[226991, -15902.052, 0.14311911, 6.3553584e-06, 6.113515e-06], ' '[227443, -15902.052, 0.14311688, 6.7868327e-06, 6.113515e-06], ' '[226994, -15902.052, 0.14311177, 5.000002e-07, 6.113515e-06], ' '[227452, -15902.064, 0.14129417, 0.00160019, 6.113515e-06]]') self.assertEqual( observations_2[( 'TACGTAGGGCGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCA' 'CGTCGGATGTGAAAGCCCGGGGCTTAACCCCGGGTCTG')], exp_placement) # ... and one fragment that was recompted via SEPP during this test exp_placement = ( '[[78, -18489.055, 0.8486466, 0.015792055, 6.113515e-06], ' '[74, -18491.146, 0.10484001, 0.017408343, 0.010122812], ' '[77, -18491.959, 0.046513416, 0.015838308, 0.010945947]]') self.assertEqual(observations_2[self.novel_seqs[0]], exp_placement)