def write_demux_files(self, prep_template, generate_hdf5=True): """Writes a demux test file to avoid duplication of code""" fna_fp = join(self.temp_dir, 'seqs.fna') demux_fp = join(self.temp_dir, 'demux.seqs') if generate_hdf5: with open(fna_fp, 'w') as f: f.write(FASTA_EXAMPLE) with File(demux_fp, "w") as f: to_hdf5(fna_fp, f) else: with open(demux_fp, 'w') as f: f.write('') if prep_template.artifact is None: ppd = Artifact.create( [(demux_fp, 6)], "Demultiplexed", prep_template=prep_template, can_be_submitted_to_ebi=True, can_be_submitted_to_vamps=True) else: params = Parameters.from_default_params( DefaultParameters(1), {'input_data': prep_template.artifact.id}) ppd = Artifact.create( [(demux_fp, 6)], "Demultiplexed", parents=[prep_template.artifact], processing_parameters=params, can_be_submitted_to_ebi=True, can_be_submitted_to_vamps=True) return ppd
def write_demux_files(self, prep_template, generate_hdf5=True): """Writes a demux test file to avoid duplication of code""" fna_fp = join(self.temp_dir, 'seqs.fna') demux_fp = join(self.temp_dir, 'demux.seqs') if generate_hdf5: with open(fna_fp, 'w') as f: f.write(FASTA_EXAMPLE) with File(demux_fp, "w") as f: to_hdf5(fna_fp, f) else: with open(demux_fp, 'w') as f: f.write('') if prep_template.artifact is None: ppd = Artifact.create([(demux_fp, 6)], "Demultiplexed", prep_template=prep_template) else: params = Parameters.from_default_params( DefaultParameters(1), {'input_data': prep_template.artifact.id}) ppd = Artifact.create([(demux_fp, 6)], "Demultiplexed", parents=[prep_template.artifact], processing_parameters=params) return ppd
def setUp(self): uploads_path = get_mountpoint('uploads')[0][1] # Create prep test file to point at self.update_fp = join(uploads_path, '1', 'update.txt') with open(self.update_fp, 'w') as f: f.write("""sample_name\tnew_col\n1.SKD6.640190\tnew_value\n""") self._files_to_remove = [self.update_fp] self._files_to_remove = [] # creating temporal files and artifact # NOTE: we don't need to remove the artifact created cause it's # used to test the delete functionality fd, fp = mkstemp(suffix='_seqs.fna') close(fd) with open(fp, 'w') as f: f.write(">1.sid_r4_0 M02034:17:000000000-A5U18:1:1101:15370:1394 " "1:N:0:1 orig_bc=CATGAGCT new_bc=CATGAGCT bc_diffs=0\n" "GTGTGCCAGCAGCCGCGGTAATACGTAGGG\n") # 4 Demultiplexed filepaths_processed = [(fp, 4)] # 1 for default parameters and input data exp_params = Parameters.from_default_params(DefaultParameters(1), {'input_data': 1}) self.artifact = Artifact.create(filepaths_processed, "Demultiplexed", parents=[Artifact(1)], processing_parameters=exp_params)
def post(self, study_id, prep_id): study = self.safe_get_study(study_id) if study is None: return prep_id = to_int(prep_id) try: p = PrepTemplate(prep_id) except QiitaDBUnknownIDError: self.fail('Preparation not found', 404) return if p.study_id != study.id: self.fail('Preparation ID not associated with the study', 409) return artifact_deets = json_decode(self.request.body) _, upload = get_mountpoint('uploads')[0] base = os.path.join(upload, study_id) filepaths = [(os.path.join(base, fp), fp_type) for fp, fp_type in artifact_deets['filepaths']] try: art = Artifact.create(filepaths, artifact_deets['artifact_type'], artifact_deets['artifact_name'], p) except QiitaError as e: self.fail(str(e), 406) return self.write({'id': art.id}) self.set_status(201) self.finish()
def test_delete_analysis(self): # adding extra filepaths to make sure the delete works as expected, we # basically want 8 -> 9 -> 10 -> 12 -> 14 # -> 11 -> 13 fd, fp10 = mkstemp(suffix='_table.biom') close(fd) fd, fp11 = mkstemp(suffix='_table.biom') close(fd) fd, fp12 = mkstemp(suffix='_table.biom') close(fd) fd, fp13 = mkstemp(suffix='_table.biom') close(fd) fd, fp14 = mkstemp(suffix='_table.biom') close(fd) with biom_open(fp10, 'w') as f: et.to_hdf5(f, "test") with biom_open(fp11, 'w') as f: et.to_hdf5(f, "test") with biom_open(fp12, 'w') as f: et.to_hdf5(f, "test") with biom_open(fp13, 'w') as f: et.to_hdf5(f, "test") with biom_open(fp14, 'w') as f: et.to_hdf5(f, "test") self._clean_up_files.extend([fp10, fp11, fp12, fp13, fp14]) # copying some processing parameters a9 = Artifact(9) pp = a9.processing_parameters # 7: BIOM a10 = Artifact.create([(fp10, 7)], "BIOM", parents=[a9], processing_parameters=pp) a11 = Artifact.create([(fp11, 7)], "BIOM", parents=[a9], processing_parameters=pp) a12 = Artifact.create([(fp12, 7)], "BIOM", parents=[a10], processing_parameters=pp) Artifact.create([(fp13, 7)], "BIOM", parents=[a11], processing_parameters=pp) Artifact.create([(fp14, 7)], "BIOM", parents=[a12], processing_parameters=pp) job = self._create_job('delete_analysis', {'analysis_id': 1}) private_task(job.id) self.assertEqual(job.status, 'success') with self.assertRaises(QiitaDBUnknownIDError): Analysis(1)
def create_raw_data(artifact_type, prep_template, filepaths, name=None): """Creates a new raw data Needs to be dispachable because it moves large files Parameters ---------- artifact_type: str The artifact type prep_template : qiita_db.metadata_template.prep_template.PrepTemplate The template to attach the artifact filepaths : list of (str, str) The list with filepaths and their filepath types name : str, optional The name of the new artifact Returns ------- dict of {str: str} A dict of the form {'status': str, 'message': str} """ from qiita_db.artifact import Artifact status = 'success' msg = '' try: Artifact.create(filepaths, artifact_type, name=name, prep_template=prep_template) except Exception as e: # We should hit this exception rarely (that's why it is an # exception) since at this point we have done multiple checks. # However, it can occur in weird cases, so better let the GUI know # that this failed return {'status': 'danger', 'message': "Error creating artifact: %s" % str(e)} return {'status': status, 'message': msg}
def test_submit_to_EBI(self): # setting up test fna_fp = join(self.temp_dir, 'seqs.fna') demux_fp = join(self.temp_dir, 'demux.seqs') with open(fna_fp, 'w') as f: f.write(FASTA_EXAMPLE) with File(demux_fp, "w") as f: to_hdf5(fna_fp, f) pt = PrepTemplate(1) params = Parameters.from_default_params(DefaultParameters(1), {'input_data': pt.artifact.id}) artifact = Artifact.create([(demux_fp, 6)], "Demultiplexed", parents=[pt.artifact], processing_parameters=params) # submit job job = self._create_job('submit_to_EBI', { 'artifact': artifact.id, 'submission_type': 'VALIDATE' }) job._set_status('in_construction') job.submit() # wait for the job to fail, and check that the status is submitting checked_submitting = True while job.status != 'error': if checked_submitting: self.assertEqual('submitting', artifact.study.ebi_submission_status) checked_submitting = False # once it fails wait for a few to check status again sleep(5) exp = 'Some artifact submissions failed: %d' % artifact.id obs = artifact.study.ebi_submission_status self.assertEqual(obs, exp) # make sure that the error is correct, we have 2 options if environ.get('ASPERA_SCP_PASS', '') != '': self.assertIn('1.SKM2.640199', job.log.msg) else: self.assertIn('ASCP Error:', job.log.msg) # wait for everything to finish to avoid DB deadlocks sleep(5)
def test_submit_to_EBI(self): # setting up test fna_fp = join(self.temp_dir, 'seqs.fna') demux_fp = join(self.temp_dir, 'demux.seqs') with open(fna_fp, 'w') as f: f.write(FASTA_EXAMPLE) with File(demux_fp, "w") as f: to_hdf5(fna_fp, f) pt = PrepTemplate(1) params = Parameters.from_default_params( DefaultParameters(1), {'input_data': pt.artifact.id}) artifact = Artifact.create( [(demux_fp, 6)], "Demultiplexed", parents=[pt.artifact], processing_parameters=params) # submit job job = self._create_job('submit_to_EBI', { 'artifact': artifact.id, 'submission_type': 'VALIDATE'}) job._set_status('in_construction') job.submit() # wait for the job to fail, and check that the status is submitting checked_submitting = True while job.status != 'error': if checked_submitting: self.assertEqual('submitting', artifact.study.ebi_submission_status) checked_submitting = False # once it fails wait for a few to check status again sleep(5) exp = 'Some artifact submissions failed: %d' % artifact.id obs = artifact.study.ebi_submission_status self.assertEqual(obs, exp) # make sure that the error is correct, we have 2 options if environ.get('ASPERA_SCP_PASS', '') != '': self.assertIn('1.SKM2.640199', job.log.msg) else: self.assertIn('ASCP Error:', job.log.msg) # wait for everything to finish to avoid DB deadlocks sleep(5)
def generate_new_study_with_preprocessed_data(self): """Creates a new study up to the processed data for testing""" info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 3, "number_samples_promised": 3, "study_alias": "Test EBI", "study_description": "Study for testing EBI", "study_abstract": "Study for testing EBI", "emp_person_id": StudyPerson(2), "principal_investigator_id": StudyPerson(3), "lab_person_id": StudyPerson(1), } study = Study.create(User("*****@*****.**"), "Test EBI study", [1], info) metadata_dict = { "Sample1": { "collection_timestamp": datetime(2015, 6, 1, 7, 0, 0), "physical_specimen_location": "location1", "taxon_id": 9606, "scientific_name": "h**o sapiens", "Description": "Test Sample 1", }, "Sample2": { "collection_timestamp": datetime(2015, 6, 2, 7, 0, 0), "physical_specimen_location": "location1", "taxon_id": 9606, "scientific_name": "h**o sapiens", "Description": "Test Sample 2", }, "Sample3": { "collection_timestamp": datetime(2015, 6, 3, 7, 0, 0), "physical_specimen_location": "location1", "taxon_id": 9606, "scientific_name": "h**o sapiens", "Description": "Test Sample 3", }, } metadata = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str) SampleTemplate.create(metadata, study) metadata_dict = { "Sample1": { "primer": "GTGCCAGCMGCCGCGGTAA", "barcode": "CGTAGAGCTCTC", "center_name": "KnightLab", "platform": "ILLUMINA", "instrument_model": "Illumina MiSeq", "library_construction_protocol": "Protocol ABC", "experiment_design_description": "Random value 1", }, "Sample2": { "primer": "GTGCCAGCMGCCGCGGTAA", "barcode": "CGTAGAGCTCTA", "center_name": "KnightLab", "platform": "ILLUMINA", "instrument_model": "Illumina MiSeq", "library_construction_protocol": "Protocol ABC", "experiment_design_description": "Random value 2", }, "Sample3": { "primer": "GTGCCAGCMGCCGCGGTAA", "barcode": "CGTAGAGCTCTT", "center_name": "KnightLab", "platform": "ILLUMINA", "instrument_model": "Illumina MiSeq", "library_construction_protocol": "Protocol ABC", "experiment_design_description": "Random value 3", }, } metadata = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str) pt = PrepTemplate.create(metadata, study, "16S", "Metagenomics") fna_fp = join(self.temp_dir, "seqs.fna") demux_fp = join(self.temp_dir, "demux.seqs") with open(fna_fp, "w") as f: f.write(FASTA_EXAMPLE_2.format(study.id)) with File(demux_fp, "w") as f: to_hdf5(fna_fp, f) ppd = Artifact.create([(demux_fp, 6)], "Demultiplexed", prep_template=pt) return ppd
def generate_new_study_with_preprocessed_data(self): """Creates a new study up to the processed data for testing""" info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 3, "number_samples_promised": 3, "study_alias": "Test EBI", "study_description": "Study for testing EBI", "study_abstract": "Study for testing EBI", "emp_person_id": StudyPerson(2), "principal_investigator_id": StudyPerson(3), "lab_person_id": StudyPerson(1) } study = Study.create(User('*****@*****.**'), "Test EBI study", info) metadata_dict = { 'Sample1': { 'collection_timestamp': datetime(2015, 6, 1, 7, 0, 0), 'physical_specimen_location': 'location1', 'taxon_id': 9606, 'scientific_name': 'h**o sapiens', 'Description': 'Test Sample 1' }, 'Sample2': { 'collection_timestamp': datetime(2015, 6, 2, 7, 0, 0), 'physical_specimen_location': 'location1', 'taxon_id': 9606, 'scientific_name': 'h**o sapiens', 'Description': 'Test Sample 2' }, 'Sample3': { 'collection_timestamp': datetime(2015, 6, 3, 7, 0, 0), 'physical_specimen_location': 'location1', 'taxon_id': 9606, 'scientific_name': 'h**o sapiens', 'Description': 'Test Sample 3' } } metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', dtype=str) SampleTemplate.create(metadata, study) metadata_dict = { 'Sample1': { 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTC', 'center_name': 'KnightLab', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'Protocol ABC', 'experiment_design_description': "Random value 1" }, 'Sample2': { 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTA', 'center_name': 'KnightLab', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'Protocol ABC', 'experiment_design_description': "Random value 2" }, 'Sample3': { 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTT', 'center_name': 'KnightLab', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'Protocol ABC', 'experiment_design_description': "Random value 3" }, } metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', dtype=str) pt = PrepTemplate.create(metadata, study, "16S", 'Metagenomics') fna_fp = join(self.temp_dir, 'seqs.fna') demux_fp = join(self.temp_dir, 'demux.seqs') with open(fna_fp, 'w') as f: f.write(FASTA_EXAMPLE_2.format(study.id)) with File(demux_fp, 'w') as f: to_hdf5(fna_fp, f) ppd = Artifact.create([(demux_fp, 6)], "Demultiplexed", prep_template=pt) return ppd
def test_download_study(self): tmp_dir = mkdtemp() self._clean_up_files.append(tmp_dir) biom_fp = join(tmp_dir, 'otu_table.biom') smr_dir = join(tmp_dir, 'sortmerna_picked_otus') log_dir = join(smr_dir, 'seqs_otus.log') tgz = join(tmp_dir, 'sortmerna_picked_otus.tgz') with biom_open(biom_fp, 'w') as f: et.to_hdf5(f, "test") makedirs(smr_dir) with open(log_dir, 'w') as f: f.write('\n') with open(tgz, 'w') as f: f.write('\n') files_biom = [(biom_fp, 'biom'), (smr_dir, 'directory'), (tgz, 'tgz')] params = Parameters.from_default_params( next(Command(3).default_parameter_sets), {'input_data': 1}) a = Artifact.create(files_biom, "BIOM", parents=[Artifact(2)], processing_parameters=params) for x in a.filepaths: self._clean_up_files.append(x['fp']) response = self.get('/download_study_bioms/1') self.assertEqual(response.code, 200) exp = ( '1579715020 1256812 /protected/processed_data/1_study_1001_closed_' 'reference_otu_table.biom processed_data/1_study_1001_closed_' 'reference_otu_table.biom\n' '- [0-9]* /protected/templates/1_prep_1_qiime_[0-9]*-' '[0-9]*.txt mapping_files/4_mapping_file.txt\n' '1579715020 1256812 /protected/processed_data/' '1_study_1001_closed_reference_otu_table.biom processed_data/' '1_study_1001_closed_reference_otu_table.biom\n' '- [0-9]* /protected/templates/1_prep_1_qiime_[0-9]*-' '[0-9]*.txt mapping_files/5_mapping_file.txt\n' '1579715020 1256812 /protected/processed_data/' '1_study_1001_closed_reference_otu_table_Silva.biom processed_data' '/1_study_1001_closed_reference_otu_table_Silva.biom\n' '- [0-9]* /protected/templates/1_prep_1_qiime_[0-9]*-' '[0-9]*.txt mapping_files/6_mapping_file.txt\n' '1756512010 1093210 /protected/BIOM/7/biom_table.biom ' 'BIOM/7/biom_table.biom\n' '- [0-9]* /protected/templates/1_prep_2_qiime_[0-9]*-' '[0-9]*.txt mapping_files/7_mapping_file.txt\n' '[0-9]* [0-9]* /protected/BIOM/{0}/otu_table.biom ' 'BIOM/{0}/otu_table.biom\n' '- 1 /protected/BIOM/{0}/sortmerna_picked_otus/seqs_otus.log ' 'BIOM/{0}/sortmerna_picked_otus/seqs_otus.log\n' '- [0-9]* /protected/templates/1_prep_1_qiime_[0-9]*-[0-9]*.' 'txt mapping_files/{0}_mapping_file.txt\n'.format(a.id)) self.assertRegex(response.body.decode('ascii'), exp) response = self.get('/download_study_bioms/200') self.assertEqual(response.code, 405) # changing user so we can test the failures BaseHandler.get_current_user = Mock( return_value=User("*****@*****.**")) response = self.get('/download_study_bioms/1') self.assertEqual(response.code, 405) a.visibility = 'public' response = self.get('/download_study_bioms/1') # returning visibility a.visibility = 'private' self.assertEqual(response.code, 200) # we should have the same files than the previous test, except artifact # and mapping file 7: position 6 and 7; thus removing 6 twice exp = exp.split('\n') exp.pop(6) exp.pop(6) exp = '\n'.join(exp) self.assertRegex(response.body.decode('ascii'), exp)
def artifact_post_req(user_id, filepaths, artifact_type, name, prep_template_id, artifact_id=None): """Creates the initial artifact for the prep template Parameters ---------- user_id : str User adding the atrifact filepaths : dict of str Comma-separated list of files to attach to the artifact, keyed by file type artifact_type : str The type of the artifact name : str Name to give the artifact prep_template_id : int or str castable to int Prep template to attach the artifact to artifact_id : int or str castable to int, optional The id of the imported artifact Returns ------- dict of objects A dictionary containing the new artifact ID {'status': status, 'message': message, 'artifact': id} """ prep = PrepTemplate(int(prep_template_id)) study_id = prep.study_id # First check if the user has access to the study access_error = check_access(study_id, user_id) if access_error: return access_error if artifact_id: # if the artifact id has been provided, import the artifact try: artifact = Artifact.copy(Artifact(artifact_id), prep) except Exception as e: # We should hit this exception rarely (that's why it is an # exception) since at this point we have done multiple checks. # However, it can occur in weird cases, so better let the GUI know # that this failed return {'status': 'error', 'message': "Error creating artifact: %s" % str(e)} else: uploads_path = get_mountpoint('uploads')[0][1] path_builder = partial(join, uploads_path, str(study_id)) cleaned_filepaths = [] for ftype, file_list in viewitems(filepaths): # JavaScript sends us this list as a comma-separated list for fp in file_list.split(','): # JavaScript will send this value as an empty string if the # list of files was empty. In such case, the split will # generate a single element containing the empty string. Check # for that case here and, if fp is not the empty string, # proceed to check if the file exists if fp: # Check if filepath being passed exists for study full_fp = path_builder(fp) exists = check_fp(study_id, full_fp) if exists['status'] != 'success': return {'status': 'error', 'message': 'File does not exist: %s' % fp} cleaned_filepaths.append((full_fp, ftype)) # This should never happen, but it doesn't hurt to actually have # a explicit check, in case there is something odd with the JS if not cleaned_filepaths: return {'status': 'error', 'message': "Can't create artifact, no files provided."} try: artifact = Artifact.create(cleaned_filepaths, artifact_type, name=name, prep_template=prep) except Exception as e: # We should hit this exception rarely (that's why it is an # exception) since at this point we have done multiple checks. # However, it can occur in weird cases, so better let the GUI know # that this failed return {'status': 'error', 'message': "Error creating artifact: %s" % str(e)} return {'status': 'success', 'message': '', 'artifact': artifact.id}
def test_download_study(self): tmp_dir = mkdtemp() self._clean_up_files.append(tmp_dir) biom_fp = join(tmp_dir, 'otu_table.biom') smr_dir = join(tmp_dir, 'sortmerna_picked_otus') log_dir = join(smr_dir, 'seqs_otus.log') tgz = join(tmp_dir, 'sortmerna_picked_otus.tgz') with biom_open(biom_fp, 'w') as f: et.to_hdf5(f, "test") makedirs(smr_dir) with open(log_dir, 'w') as f: f.write('\n') with open(tgz, 'w') as f: f.write('\n') files_biom = [(biom_fp, 'biom'), (smr_dir, 'directory'), (tgz, 'tgz')] params = Parameters.from_default_params( next(Command(3).default_parameter_sets), {'input_data': 1}) a = Artifact.create(files_biom, "BIOM", parents=[Artifact(2)], processing_parameters=params) for x in a.filepaths: self._clean_up_files.append(x['fp']) response = self.get('/download_study_bioms/1') self.assertEqual(response.code, 200) exp = ('- 1256812 /protected/processed_data/' '1_study_1001_closed_reference_otu_table.biom processed_data/' '1_study_1001_closed_reference_otu_table.biom\n' '- [0-9]* /protected/templates/1_prep_1_[0-9]*-[0-9]*.txt ' 'mapping_files/4_mapping_file.txt\n' '- 1256812 /protected/processed_data/' '1_study_1001_closed_reference_otu_table.biom processed_data/' '1_study_1001_closed_reference_otu_table.biom\n' '- [0-9]* /protected/templates/1_prep_1_[0-9]*-[0-9]*.txt ' 'mapping_files/5_mapping_file.txt\n' '- 1256812 /protected/processed_data/1_study_1001_' 'closed_reference_otu_table_Silva.biom processed_data/' '1_study_1001_closed_reference_otu_table_Silva.biom\n' '- [0-9]* /protected/templates/1_prep_1_[0-9]*-[0-9]*.txt ' 'mapping_files/6_mapping_file.txt\n' '- 1093210 /protected/BIOM/7/biom_table.biom ' 'BIOM/7/biom_table.biom\n' '- [0-9]* /protected/templates/1_prep_2_[0-9]*-[0-9]*.txt ' 'mapping_files/7_mapping_file.txt\n' '- [0-9]* /protected/BIOM/{0}/otu_table.biom ' 'BIOM/{0}/otu_table.biom\n' '- 1 /protected/BIOM/10/sortmerna_picked_otus/seqs_otus.log ' 'BIOM/{0}/sortmerna_picked_otus/seqs_otus.log\n' '- [0-9]* /protected/templates/1_prep_1_[0-9]*-[0-9]*.txt ' 'mapping_files/{0}_mapping_file.txt\n'.format(a.id)) self.assertRegex(response.body.decode('ascii'), exp) response = self.get('/download_study_bioms/200') self.assertEqual(response.code, 405) # changing user so we can test the failures BaseHandler.get_current_user = Mock( return_value=User("*****@*****.**")) response = self.get('/download_study_bioms/1') self.assertEqual(response.code, 405) a.visibility = 'public' response = self.get('/download_study_bioms/1') # returning visibility a.visibility = 'private' self.assertEqual(response.code, 200) # we should have the same files than the previous test, except artifact # and mapping file 7: position 6 and 7; thus removing 6 twice exp = exp.split('\n') exp.pop(6) exp.pop(6) exp = '\n'.join(exp) self.assertRegex(response.body.decode('ascii'), exp)
def generate_new_study_with_preprocessed_data(self): """Creates a new study up to the processed data for testing""" # ignoring warnings generated when adding templates simplefilter("ignore") info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 3, "number_samples_promised": 3, "study_alias": "Test EBI", "study_description": "Study for testing EBI", "study_abstract": "Study for testing EBI", "emp_person_id": StudyPerson(2), "principal_investigator_id": StudyPerson(3), "lab_person_id": StudyPerson(1) } study = Study.create(User('*****@*****.**'), "Test EBI study", [1], info) metadata_dict = { 'Sample1': {'collection_timestamp': datetime(2015, 6, 1, 7, 0, 0), 'physical_specimen_location': 'location1', 'taxon_id': 9606, 'scientific_name': 'h**o sapiens', 'Description': 'Test Sample 1'}, 'Sample2': {'collection_timestamp': datetime(2015, 6, 2, 7, 0, 0), 'physical_specimen_location': 'location1', 'taxon_id': 9606, 'scientific_name': 'h**o sapiens', 'Description': 'Test Sample 2'}, 'Sample3': {'collection_timestamp': datetime(2015, 6, 3, 7, 0, 0), 'physical_specimen_location': 'location1', 'taxon_id': 9606, 'scientific_name': 'h**o sapiens', 'Description': 'Test Sample 3'} } metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', dtype=str) SampleTemplate.create(metadata, study) metadata_dict = { 'Sample1': {'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTC', 'center_name': 'KnightLab', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'Protocol ABC', 'experiment_design_description': "Random value 1"}, 'Sample2': {'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTA', 'center_name': 'KnightLab', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'Protocol ABC', 'experiment_design_description': "Random value 2"}, 'Sample3': {'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTT', 'center_name': 'KnightLab', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'Protocol ABC', 'experiment_design_description': "Random value 3"}, } metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', dtype=str) pt = PrepTemplate.create(metadata, study, "16S", 'Metagenomics') fna_fp = join(self.temp_dir, 'seqs.fna') demux_fp = join(self.temp_dir, 'demux.seqs') with open(fna_fp, 'w') as f: f.write(FASTA_EXAMPLE_2.format(study.id)) with File(demux_fp, 'w') as f: to_hdf5(fna_fp, f) ppd = Artifact.create( [(demux_fp, 6)], "Demultiplexed", prep_template=pt) return ppd
def test_download_study(self): tmp_dir = mkdtemp() self._clean_up_files.append(tmp_dir) biom_fp = join(tmp_dir, 'otu_table.biom') smr_dir = join(tmp_dir, 'sortmerna_picked_otus') log_dir = join(smr_dir, 'seqs_otus.log') tgz = join(tmp_dir, 'sortmerna_picked_otus.tgz') with biom_open(biom_fp, 'w') as f: et.to_hdf5(f, "test") makedirs(smr_dir) with open(log_dir, 'w') as f: f.write('\n') with open(tgz, 'w') as f: f.write('\n') files_biom = [(biom_fp, 'biom'), (smr_dir, 'directory'), (tgz, 'tgz')] params = Parameters.from_default_params( Command(3).default_parameter_sets.next(), {'input_data': 1}) a = Artifact.create(files_biom, "BIOM", parents=[Artifact(2)], processing_parameters=params) for _, fp, _ in a.filepaths: self._clean_up_files.append(fp) response = self.get('/download_study_bioms/1') self.assertEqual(response.code, 200) exp = ( '- 1256812 /protected/processed_data/1_study_1001_closed_' 'reference_otu_table.biom processed_data/1_study_1001_closed_' 'reference_otu_table.biom\n' '- 36615 /protected/templates/1_prep_1_qiime_[0-9]*-' '[0-9]*.txt mapping_files/4_mapping_file.txt\n' '- 1256812 /protected/processed_data/' '1_study_1001_closed_reference_otu_table.biom processed_data/' '1_study_1001_closed_reference_otu_table.biom\n' '- 36615 /protected/templates/1_prep_1_qiime_[0-9]*-' '[0-9]*.txt mapping_files/5_mapping_file.txt\n' '- 1256812 /protected/processed_data/' '1_study_1001_closed_reference_otu_table_Silva.biom processed_data' '/1_study_1001_closed_reference_otu_table_Silva.biom\n' '- 36615 /protected/templates/1_prep_1_qiime_[0-9]*-' '[0-9]*.txt mapping_files/6_mapping_file.txt\n' '- 36615 /protected/templates/1_prep_2_qiime_[0-9]*-' '[0-9]*.txt mapping_files/7_mapping_file.txt\n' '- 39752 /protected/BIOM/{0}/otu_table.biom ' 'BIOM/{0}/otu_table.biom\n' '- 1 /protected/BIOM/{0}/sortmerna_picked_otus/seqs_otus.log ' 'BIOM/{0}/sortmerna_picked_otus/seqs_otus.log\n' '- 36615 /protected/templates/1_prep_1_qiime_[0-9]*-[0-9]*.' 'txt mapping_files/{0}_mapping_file.txt\n'.format(a.id)) self.assertRegexpMatches(response.body, exp) response = self.get('/download_study_bioms/200') self.assertEqual(response.code, 405) # changing user so we can test the failures BaseHandler.get_current_user = Mock( return_value=User("*****@*****.**")) response = self.get('/download_study_bioms/1') self.assertEqual(response.code, 405) a.visibility = 'public' response = self.get('/download_study_bioms/1') self.assertEqual(response.code, 200) exp = ('- 39752 /protected/BIOM/{0}/otu_table.biom ' 'BIOM/{0}/otu_table.biom\n' '- 1 /protected/BIOM/{0}/sortmerna_picked_otus/seqs_otus.log ' 'BIOM/{0}/sortmerna_picked_otus/seqs_otus.log\n' '- 36615 /protected/templates/1_prep_1_qiime_[0-9]*-[0-9]*.' 'txt mapping_files/{0}_mapping_file.txt\n'.format(a.id)) self.assertRegexpMatches(response.body, exp)
def create_raw_data(filetype, prep_template, filepaths): """Creates a new raw data Needs to be dispachable because it moves large files """ Artifact.create(filepaths, filetype, prep_template=prep_template)
def test_download_study(self): tmp_dir = mkdtemp() self._clean_up_files.append(tmp_dir) biom_fp = join(tmp_dir, 'otu_table.biom') smr_dir = join(tmp_dir, 'sortmerna_picked_otus') log_dir = join(smr_dir, 'seqs_otus.log') tgz = join(tmp_dir, 'sortmerna_picked_otus.tgz') with biom_open(biom_fp, 'w') as f: et.to_hdf5(f, "test") makedirs(smr_dir) with open(log_dir, 'w') as f: f.write('\n') with open(tgz, 'w') as f: f.write('\n') files_biom = [(biom_fp, 'biom'), (smr_dir, 'directory'), (tgz, 'tgz')] params = Parameters.from_default_params( Command(3).default_parameter_sets.next(), {'input_data': 1}) a = Artifact.create(files_biom, "BIOM", parents=[Artifact(2)], processing_parameters=params) for _, fp, _ in a.filepaths: self._clean_up_files.append(fp) response = self.get('/download_study_bioms/1') self.assertEqual(response.code, 200) exp = ( '- 1256812 /protected/processed_data/1_study_1001_closed_' 'reference_otu_table.biom processed_data/1_study_1001_closed_' 'reference_otu_table.biom\n' '- 36615 /protected/templates/1_prep_1_qiime_[0-9]*-' '[0-9]*.txt mapping_files/4_mapping_file.txt\n' '- 1256812 /protected/processed_data/' '1_study_1001_closed_reference_otu_table.biom processed_data/' '1_study_1001_closed_reference_otu_table.biom\n' '- 36615 /protected/templates/1_prep_1_qiime_[0-9]*-' '[0-9]*.txt mapping_files/5_mapping_file.txt\n' '- 1256812 /protected/processed_data/' '1_study_1001_closed_reference_otu_table_Silva.biom processed_data' '/1_study_1001_closed_reference_otu_table_Silva.biom\n' '- 36615 /protected/templates/1_prep_1_qiime_[0-9]*-' '[0-9]*.txt mapping_files/6_mapping_file.txt\n' '- 36615 /protected/templates/1_prep_2_qiime_[0-9]*-' '[0-9]*.txt mapping_files/7_mapping_file.txt\n' '- [0-9]* /protected/BIOM/{0}/otu_table.biom ' 'BIOM/{0}/otu_table.biom\n' '- 1 /protected/BIOM/{0}/sortmerna_picked_otus/seqs_otus.log ' 'BIOM/{0}/sortmerna_picked_otus/seqs_otus.log\n' '- 36615 /protected/templates/1_prep_1_qiime_[0-9]*-[0-9]*.' 'txt mapping_files/{0}_mapping_file.txt\n'.format(a.id)) self.assertRegexpMatches(response.body, exp) response = self.get('/download_study_bioms/200') self.assertEqual(response.code, 405) # changing user so we can test the failures BaseHandler.get_current_user = Mock( return_value=User("*****@*****.**")) response = self.get('/download_study_bioms/1') self.assertEqual(response.code, 405) a.visibility = 'public' response = self.get('/download_study_bioms/1') self.assertEqual(response.code, 200) exp = ( '- [0-9]* /protected/BIOM/{0}/otu_table.biom ' 'BIOM/{0}/otu_table.biom\n' '- 1 /protected/BIOM/{0}/sortmerna_picked_otus/seqs_otus.log ' 'BIOM/{0}/sortmerna_picked_otus/seqs_otus.log\n' '- 36615 /protected/templates/1_prep_1_qiime_[0-9]*-[0-9]*.' 'txt mapping_files/{0}_mapping_file.txt\n'.format(a.id)) self.assertRegexpMatches(response.body, exp)