def create_sample_template(fp, study, is_mapping_file, data_type=None): """Creates a sample template Parameters ---------- fp : str The file path to the template file study : qiita_db.study.Study The study to add the sample template to is_mapping_file : bool Whether `fp` contains a mapping file or a sample template data_type : str, optional If `is_mapping_file` is True, the data type of the prep template to be created Returns ------- dict of {str: str} A dict of the form {'status': str, 'message': str} """ # The imports need to be in here because this code is executed in # the ipython workers import warnings from os import remove from qiita_db.metadata_template.sample_template import SampleTemplate from qiita_db.metadata_template.util import load_template_to_dataframe from qiita_ware.metadata_pipeline import ( create_templates_from_qiime_mapping_file) status = 'success' msg = '' try: with warnings.catch_warnings(record=True) as warns: if is_mapping_file: create_templates_from_qiime_mapping_file(fp, study, data_type) else: SampleTemplate.create(load_template_to_dataframe(fp), study) remove(fp) # join all the warning messages into one. Note that this # info will be ignored if an exception is raised if warns: msg = '\n'.join(set(str(w.message) for w in warns)) status = 'warning' except Exception as e: # Some error occurred while processing the sample template # Show the error to the user so they can fix the template status = 'danger' msg = str(e) return {'status': status, 'message': msg}
def create_sample_template(fp, study, is_mapping_file, data_type=None): """Creates a sample template Parameters ---------- fp : str The file path to the template file study : qiita_db.study.Study The study to add the sample template to is_mapping_file : bool Whether `fp` contains a mapping file or a sample template data_type : str, optional If `is_mapping_file` is True, the data type of the prep template to be created Returns ------- dict of {str: str} A dict of the form {'status': str, 'message': str} """ # The imports need to be in here because this code is executed in # the ipython workers import warnings from os import remove from qiita_db.metadata_template.sample_template import SampleTemplate from qiita_db.metadata_template.util import load_template_to_dataframe from qiita_ware.metadata_pipeline import ( create_templates_from_qiime_mapping_file) status = 'success' msg = '' try: with warnings.catch_warnings(record=True) as warns: if is_mapping_file: create_templates_from_qiime_mapping_file(fp, study, data_type) else: SampleTemplate.create(load_template_to_dataframe(fp), study) remove(fp) # join all the warning messages into one. Note that this # info will be ignored if an exception is raised if warns: msg = '\n'.join(set(str(w.message) for w in warns)) status = 'warning' except Exception as e: # Some error occurred while processing the sample template # Show the error to the user so they can fix the template status = 'danger' msg = str(e) return {'status': status, 'message': msg}
def test_delete_sample_template(self): # Error case job = self._create_job('delete_sample_template', {'study': 1}) private_task(job.id) self.assertEqual(job.status, 'error') self.assertIn( "Sample template cannot be erased because there are " "prep templates associated", job.log.msg) # Success case info = { "timeseries_type_id": '1', "metadata_complete": 'true', "mixs_compliant": 'true', "number_samples_collected": 25, "number_samples_promised": 28, "study_alias": "TDST", "study_description": "Test delete sample template", "study_abstract": "Test delete sample template", "principal_investigator_id": StudyPerson(1) } study = Study.create(User('*****@*****.**'), "Delete Sample Template test", info) metadata = pd.DataFrame.from_dict( { 'Sample1': { 'physical_specimen_location': 'location1', 'physical_specimen_remaining': 'true', 'dna_extracted': 'true', 'sample_type': 'type1', 'collection_timestamp': '2014-05-29 12:24:15', 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 1', 'latitude': '42.42', 'longitude': '41.41', 'taxon_id': '9606', 'scientific_name': 'h**o sapiens' } }, orient='index', dtype=str) SampleTemplate.create(metadata, study) job = self._create_job('delete_sample_template', {'study': study.id}) private_task(job.id) self.assertEqual(job.status, 'success') self.assertFalse(SampleTemplate.exists(study.id))
def test_get_lat_longs_EMP_portal(self): info = { 'timeseries_type_id': 1, 'lab_person_id': None, 'principal_investigator_id': 3, 'metadata_complete': False, 'mixs_compliant': True, 'study_description': 'desc', 'study_alias': 'alias', 'study_abstract': 'abstract'} study = Study.create(User('*****@*****.**'), 'test_study_1', efo=[1], info=info) Portal('EMP').add_studies([study.id]) md = { 'my.sample': { 'physical_specimen_location': 'location1', 'physical_specimen_remaining': True, 'dna_extracted': True, 'sample_type': 'type1', 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 4', 'str_column': 'Value for sample 4', 'int_column': 4, 'latitude': 42.42, 'longitude': 41.41, 'taxon_id': 9606, 'scientific_name': 'h**o sapiens'} } md_ext = pd.DataFrame.from_dict(md, orient='index') SampleTemplate.create(md_ext, study) qiita_config.portal = 'EMP' obs = get_lat_longs() exp = [[42.42, 41.41]] self.assertItemsEqual(obs, exp)
def test_delete_sample_template(self): # Error case job = self._create_job('delete_sample_template', {'study': 1}) private_task(job.id) self.assertEqual(job.status, 'error') self.assertIn("Sample template cannot be erased because there are " "prep templates associated", job.log.msg) # Success case info = {"timeseries_type_id": '1', "metadata_complete": 'true', "mixs_compliant": 'true', "number_samples_collected": 25, "number_samples_promised": 28, "study_alias": "TDST", "study_description": "Test delete sample template", "study_abstract": "Test delete sample template", "principal_investigator_id": StudyPerson(1)} study = Study.create(User('*****@*****.**'), "Delete Sample Template test", info) metadata = pd.DataFrame.from_dict( {'Sample1': {'physical_specimen_location': 'location1', 'physical_specimen_remaining': 'true', 'dna_extracted': 'true', 'sample_type': 'type1', 'collection_timestamp': '2014-05-29 12:24:15', 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 1', 'latitude': '42.42', 'longitude': '41.41', 'taxon_id': '9606', 'scientific_name': 'h**o sapiens'}}, orient='index', dtype=str) SampleTemplate.create(metadata, study) job = self._create_job('delete_sample_template', {'study': study.id}) private_task(job.id) self.assertEqual(job.status, 'success') self.assertFalse(SampleTemplate.exists(study.id))
def test_sample_template_handler_patch_request(self): user = User('*****@*****.**') # Test user doesn't have access with self.assertRaisesRegexp(HTTPError, 'User does not have access to study'): sample_template_handler_patch_request( User('*****@*****.**'), "remove", "/1/columns/season_environment/") # Test study doesn't exist with self.assertRaisesRegexp(HTTPError, 'Study does not exist'): sample_template_handler_patch_request( user, "remove", "/10000/columns/season_environment/") # Test sample template doesn't exist new_study = self._create_study('Patching test') with self.assertRaisesRegexp(HTTPError, "Study %s doesn't have sample information" % new_study.id): sample_template_handler_patch_request( user, "remove", "/%s/columns/season_environment/" % new_study.id) # Test wrong operation value with self.assertRaisesRegexp( HTTPError, 'Operation add not supported. Current supported ' 'operations: remove.'): sample_template_handler_patch_request( user, 'add', '/1/columns/season_environment') # Test wrong path parameter < 2 with self.assertRaisesRegexp(HTTPError, 'Incorrect path parameter'): sample_template_handler_patch_request(user, 'ignored', '1') # TESTS FOR OPERATION: remove # Test wrong path parameter with self.assertRaisesRegexp(HTTPError, 'Incorrect path parameter'): sample_template_handler_patch_request( user, 'remove', '/1/season_environment/') # Add sample information to the new study so we can delete one column # without affecting the other tests md = pd.DataFrame.from_dict( {'Sample1': {'col1': 'val1', 'col2': 'val2'}}, orient='index', dtype=str) st = SampleTemplate.create(md, new_study) # Test success obs = sample_template_handler_patch_request( user, "remove", "/%s/columns/col2/" % new_study.id) self.assertEqual(obs.keys(), ['job']) job_info = r_client.get('sample_template_%s' % new_study.id) self.assertIsNotNone(job_info) # Wait until the job is done wait_for_processing_job(loads(job_info)['job_id']) self.assertNotIn('col2', st.categories()) # TESTS FOR OPERATION: replace # Test incorrect path parameter with replace with self.assertRaisesRegexp(HTTPError, 'Incorrect path parameter'): sample_template_handler_patch_request(user, "replace", "/1/") # Test attribute not found with self.assertRaisesRegexp(HTTPError, 'Attribute name not found'): sample_template_handler_patch_request(user, "replace", "/1/name") # Test missing value with self.assertRaisesRegexp(HTTPError, 'Value is required when updating sample ' 'information'): sample_template_handler_patch_request(user, "replace", "/1/data") # Test file doesn't exist with self.assertRaisesRegexp(HTTPError, 'Filepath not found'): sample_template_handler_patch_request(user, "replace", "/1/data", req_value='DoesNotExist') # Test success obs = sample_template_handler_patch_request( user, "replace", "/1/data", req_value='uploaded_file.txt') self.assertEqual(obs.keys(), ['job']) job_info = r_client.get('sample_template_1') self.assertIsNotNone(job_info) # Wait until the job is done wait_for_processing_job(loads(job_info)['job_id'])
def generate_new_study_with_preprocessed_data(self): """Creates a new study up to the processed data for testing""" info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 3, "number_samples_promised": 3, "study_alias": "Test EBI", "study_description": "Study for testing EBI", "study_abstract": "Study for testing EBI", "emp_person_id": StudyPerson(2), "principal_investigator_id": StudyPerson(3), "lab_person_id": StudyPerson(1) } study = Study.create(User('*****@*****.**'), "Test EBI study", info) metadata_dict = { 'Sample1': { 'collection_timestamp': datetime(2015, 6, 1, 7, 0, 0), 'physical_specimen_location': 'location1', 'taxon_id': 9606, 'scientific_name': 'h**o sapiens', 'Description': 'Test Sample 1' }, 'Sample2': { 'collection_timestamp': datetime(2015, 6, 2, 7, 0, 0), 'physical_specimen_location': 'location1', 'taxon_id': 9606, 'scientific_name': 'h**o sapiens', 'Description': 'Test Sample 2' }, 'Sample3': { 'collection_timestamp': datetime(2015, 6, 3, 7, 0, 0), 'physical_specimen_location': 'location1', 'taxon_id': 9606, 'scientific_name': 'h**o sapiens', 'Description': 'Test Sample 3' } } metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', dtype=str) SampleTemplate.create(metadata, study) metadata_dict = { 'Sample1': { 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTC', 'center_name': 'KnightLab', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'Protocol ABC', 'experiment_design_description': "Random value 1" }, 'Sample2': { 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTA', 'center_name': 'KnightLab', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'Protocol ABC', 'experiment_design_description': "Random value 2" }, 'Sample3': { 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTT', 'center_name': 'KnightLab', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'Protocol ABC', 'experiment_design_description': "Random value 3" }, } metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', dtype=str) pt = PrepTemplate.create(metadata, study, "16S", 'Metagenomics') fna_fp = join(self.temp_dir, 'seqs.fna') demux_fp = join(self.temp_dir, 'demux.seqs') with open(fna_fp, 'w') as f: f.write(FASTA_EXAMPLE_2.format(study.id)) with File(demux_fp, 'w') as f: to_hdf5(fna_fp, f) ppd = Artifact.create([(demux_fp, 6)], "Demultiplexed", prep_template=pt) return ppd
def generate_new_study_with_preprocessed_data(self): """Creates a new study up to the processed data for testing""" # ignoring warnings generated when adding templates simplefilter("ignore") info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 3, "number_samples_promised": 3, "study_alias": "Test EBI", "study_description": "Study for testing EBI", "study_abstract": "Study for testing EBI", "emp_person_id": StudyPerson(2), "principal_investigator_id": StudyPerson(3), "lab_person_id": StudyPerson(1) } study = Study.create(User('*****@*****.**'), "Test EBI study", [1], info) metadata_dict = { 'Sample1': {'collection_timestamp': datetime(2015, 6, 1, 7, 0, 0), 'physical_specimen_location': 'location1', 'taxon_id': 9606, 'scientific_name': 'h**o sapiens', 'Description': 'Test Sample 1'}, 'Sample2': {'collection_timestamp': datetime(2015, 6, 2, 7, 0, 0), 'physical_specimen_location': 'location1', 'taxon_id': 9606, 'scientific_name': 'h**o sapiens', 'Description': 'Test Sample 2'}, 'Sample3': {'collection_timestamp': datetime(2015, 6, 3, 7, 0, 0), 'physical_specimen_location': 'location1', 'taxon_id': 9606, 'scientific_name': 'h**o sapiens', 'Description': 'Test Sample 3'} } metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', dtype=str) SampleTemplate.create(metadata, study) metadata_dict = { 'Sample1': {'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTC', 'center_name': 'KnightLab', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'Protocol ABC', 'experiment_design_description': "Random value 1"}, 'Sample2': {'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTA', 'center_name': 'KnightLab', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'Protocol ABC', 'experiment_design_description': "Random value 2"}, 'Sample3': {'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTT', 'center_name': 'KnightLab', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'Protocol ABC', 'experiment_design_description': "Random value 3"}, } metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', dtype=str) pt = PrepTemplate.create(metadata, study, "16S", 'Metagenomics') fna_fp = join(self.temp_dir, 'seqs.fna') demux_fp = join(self.temp_dir, 'demux.seqs') with open(fna_fp, 'w') as f: f.write(FASTA_EXAMPLE_2.format(study.id)) with File(demux_fp, 'w') as f: to_hdf5(fna_fp, f) ppd = Artifact.create( [(demux_fp, 6)], "Demultiplexed", prep_template=pt) return ppd
def test_delete_study(self): # as samples have been submitted to EBI, this will fail job = self._create_job('delete_study', {'study': 1}) private_task(job.id) self.assertEqual(job.status, 'error') self.assertIn("Artifact 2 has been submitted to EBI", job.log.msg) # making sure the analysis, first thing to delete, still exists self.assertTrue(Analysis.exists(1)) info = { "timeseries_type_id": '1', "metadata_complete": 'true', "mixs_compliant": 'true', "study_alias": "FCM", "study_description": "Microbiome of people who eat nothing but " "fried chicken", "study_abstract": "Exploring how a high fat diet changes the " "gut microbiome", "principal_investigator_id": StudyPerson(3), "lab_person_id": StudyPerson(1) } new_study = Study.create(User('*****@*****.**'), "Fried Chicken Microbiome %s" % time(), info) # adding tags new_study.update_tags(User('*****@*****.**'), ['my new tag!']) # creating a sample information file metadata = pd.DataFrame.from_dict( { 'Sample1': { 'physical_specimen_location': 'location1', 'taxon_id': '9606', 'scientific_name': 'h**o sapiens' }, 'Sample2': { 'physical_specimen_location': 'location1', 'taxon_id': '9606', 'scientific_name': 'h**o sapiens' }, 'Sample3': { 'physical_specimen_location': 'location1', 'taxon_id': '9606', 'scientific_name': 'h**o sapiens' } }, orient='index') SampleTemplate.create(metadata, new_study) # creating a preparation information file metadata = pd.DataFrame.from_dict( { 'Sample1': { 'center_name': 'ANL', 'target_subfragment': 'V4', 'center_project_name': 'Test Project' } }, orient='index', dtype=str) PrepTemplate.create(metadata, new_study, '16S') job = self._create_job('delete_study', {'study': new_study.id}) private_task(job.id) self.assertEqual(job.status, 'success') # making sure the study doesn't exist with self.assertRaises(QiitaDBUnknownIDError): Study(new_study.id)
def test_sample_template_handler_patch_request(self): user = User('*****@*****.**') # Test user doesn't have access with self.assertRaisesRegexp(HTTPError, 'User does not have access to study'): sample_template_handler_patch_request( User('*****@*****.**'), "remove", "/1/columns/season_environment/") # Test study doesn't exist with self.assertRaisesRegexp(HTTPError, 'Study does not exist'): sample_template_handler_patch_request( user, "remove", "/10000/columns/season_environment/") # Test sample template doesn't exist new_study = self._create_study('Patching test') with self.assertRaisesRegexp( HTTPError, "Study %s doesn't have sample information" % new_study.id): sample_template_handler_patch_request( user, "remove", "/%s/columns/season_environment/" % new_study.id) # Test wrong operation value with self.assertRaisesRegexp( HTTPError, 'Operation add not supported. Current supported ' 'operations: remove.'): sample_template_handler_patch_request( user, 'add', '/1/columns/season_environment') # Test wrong path parameter < 2 with self.assertRaisesRegexp(HTTPError, 'Incorrect path parameter'): sample_template_handler_patch_request(user, 'ignored', '1') # TESTS FOR OPERATION: remove # Test wrong path parameter with self.assertRaisesRegexp(HTTPError, 'Incorrect path parameter'): sample_template_handler_patch_request(user, 'remove', '/1/season_environment/') # Add sample information to the new study so we can delete one column # without affecting the other tests md = pd.DataFrame.from_dict( {'Sample1': { 'col1': 'val1', 'col2': 'val2' }}, orient='index', dtype=str) st = SampleTemplate.create(md, new_study) # Test success obs = sample_template_handler_patch_request( user, "remove", "/%s/columns/col2/" % new_study.id) self.assertEqual(obs.keys(), ['job']) job_info = r_client.get('sample_template_%s' % new_study.id) self.assertIsNotNone(job_info) # Wait until the job is done wait_for_processing_job(loads(job_info)['job_id']) self.assertNotIn('col2', st.categories()) # TESTS FOR OPERATION: replace # Test incorrect path parameter with replace with self.assertRaisesRegexp(HTTPError, 'Incorrect path parameter'): sample_template_handler_patch_request(user, "replace", "/1/") # Test attribute not found with self.assertRaisesRegexp(HTTPError, 'Attribute name not found'): sample_template_handler_patch_request(user, "replace", "/1/name") # Test missing value with self.assertRaisesRegexp( HTTPError, 'Value is required when updating sample ' 'information'): sample_template_handler_patch_request(user, "replace", "/1/data") # Test file doesn't exist with self.assertRaisesRegexp(HTTPError, 'Filepath not found'): sample_template_handler_patch_request(user, "replace", "/1/data", req_value='DoesNotExist') # Test success obs = sample_template_handler_patch_request( user, "replace", "/1/data", req_value='uploaded_file.txt') self.assertEqual(obs.keys(), ['job']) job_info = r_client.get('sample_template_1') self.assertIsNotNone(job_info) # Wait until the job is done wait_for_processing_job(loads(job_info)['job_id'])
def process_sample_template(self, study, user, callback): """Process a sample template from the POST method Parameters ---------- study : Study The current study object user : User The current user object callback : function The callback function to call with the results once the processing is done Raises ------ HTTPError If the sample template file does not exists """ # If we are on this function, the arguments "sample_template" and # "data_type" must be defined. If not, let tornado raise its error sample_template = self.get_argument('sample_template') data_type = self.get_argument('data_type') # Get the uploads folder _, base_fp = get_mountpoint("uploads")[0] # Get the path of the sample template in the uploads folder fp_rsp = join(base_fp, str(study.id), sample_template) if not exists(fp_rsp): # The file does not exist, fail nicely raise HTTPError(404, "This file doesn't exist: %s" % fp_rsp) # Define here the message and message level in case of success msg = "The sample template '%s' has been added" % sample_template msg_level = "success" is_mapping_file = looks_like_qiime_mapping_file(fp_rsp) try: if is_mapping_file and not data_type: raise ValueError("Please, choose a data type if uploading a " "QIIME mapping file") with warnings.catch_warnings(record=True) as warns: if is_mapping_file: create_templates_from_qiime_mapping_file(fp_rsp, study, int(data_type)) else: SampleTemplate.create(load_template_to_dataframe(fp_rsp), study) remove(fp_rsp) # join all the warning messages into one. Note that this # info will be ignored if an exception is raised if warns: msg = '; '.join([convert_text_html(str(w.message)) for w in warns]) msg_level = 'warning' except (TypeError, QiitaDBColumnError, QiitaDBExecutionError, QiitaDBDuplicateError, IOError, ValueError, KeyError, CParserError, QiitaDBDuplicateHeaderError, QiitaDBError, QiitaWareError) as e: # Some error occurred while processing the sample template # Show the error to the user so they can fix the template error_msg = ('parsing the QIIME mapping file' if is_mapping_file else 'parsing the sample template') msg = html_error_message % (error_msg, basename(fp_rsp), str(e)) msg = convert_text_html(msg) msg_level = "danger" callback((msg, msg_level, None, None, None))
def create_templates_from_qiime_mapping_file(fp, study, data_type): """Creates a sample template and a prep template from qiime mapping file Parameters ---------- fp : str or file-like object Path to the QIIME mapping file study : Study The study to which the sample template belongs to data_type : str or int The data_type of the prep_template Returns ------- (SampleTemplate, PrepTemplate) The templates created from the QIIME mapping file """ qiime_map = load_template_to_dataframe(fp, index='#SampleID') # There are a few columns in the QIIME mapping file that are special and # we know how to deal with them rename_cols = { 'BarcodeSequence': 'barcode', 'LinkerPrimerSequence': 'primer', 'Description': 'description', } if 'ReverseLinkerPrimer' in qiime_map: rename_cols['ReverseLinkerPrimer'] = 'reverselinkerprimer' missing = set(rename_cols).difference(qiime_map.columns) if missing: raise QiitaWareError( "Error generating the templates from the QIIME mapping file. " "Missing QIIME mapping file columns: %s" % ', '.join(missing)) qiime_map.rename(columns=rename_cols, inplace=True) # Fix the casing in the columns that we control qiime_map.columns = [ c.lower() if c.lower() in CONTROLLED_COLS else c for c in qiime_map.columns ] # Figure out which columns belong to the prep template def _col_iterator(restriction_set): for restriction in restriction_set.values(): for cols in restriction.columns.keys(): yield cols pt_cols = set(col for col in _col_iterator(PREP_TEMPLATE_COLUMNS)) data_type_str = (convert_from_id(data_type, "data_type") if isinstance( data_type, int) else data_type) if data_type_str in TARGET_GENE_DATA_TYPES: pt_cols.update( col for col in _col_iterator(PREP_TEMPLATE_COLUMNS_TARGET_GENE)) pt_cols.add('reverselinkerprimer') qiime_cols = set(qiime_map.columns) pt_cols = qiime_cols.intersection(pt_cols) st_cols = qiime_cols.difference(pt_cols) st_md = qiime_map.loc[:, st_cols] pt_md = qiime_map.loc[:, pt_cols] return (SampleTemplate.create(st_md, study), PrepTemplate.create(pt_md, study, data_type))
def create_templates_from_qiime_mapping_file(fp, study, data_type): """Creates a sample template and a prep template from qiime mapping file Parameters ---------- fp : str or file-like object Path to the QIIME mapping file study : Study The study to which the sample template belongs to data_type : str or int The data_type of the prep_template Returns ------- (SampleTemplate, PrepTemplate) The templates created from the QIIME mapping file """ qiime_map = load_template_to_dataframe(fp, index='#SampleID') # There are a few columns in the QIIME mapping file that are special and # we know how to deal with them rename_cols = { 'BarcodeSequence': 'barcode', 'LinkerPrimerSequence': 'primer', 'Description': 'description', } if 'ReverseLinkerPrimer' in qiime_map: rename_cols['ReverseLinkerPrimer'] = 'reverselinkerprimer' missing = set(rename_cols).difference(qiime_map.columns) if missing: raise QiitaWareError( "Error generating the templates from the QIIME mapping file. " "Missing QIIME mapping file columns: %s" % ', '.join(missing)) qiime_map.rename(columns=rename_cols, inplace=True) # Fix the casing in the columns that we control qiime_map.columns = [c.lower() if c.lower() in CONTROLLED_COLS else c for c in qiime_map.columns] # Figure out which columns belong to the prep template def _col_iterator(restriction_set): for restriction in viewvalues(restriction_set): for cols in viewkeys(restriction.columns): yield cols pt_cols = set(col for col in _col_iterator(PREP_TEMPLATE_COLUMNS)) data_type_str = (convert_from_id(data_type, "data_type") if isinstance(data_type, int) else data_type) if data_type_str in TARGET_GENE_DATA_TYPES: pt_cols.update( col for col in _col_iterator(PREP_TEMPLATE_COLUMNS_TARGET_GENE)) pt_cols.add('reverselinkerprimer') qiime_cols = set(qiime_map.columns) pt_cols = qiime_cols.intersection(pt_cols) st_cols = qiime_cols.difference(pt_cols) st_md = qiime_map.ix[:, st_cols] pt_md = qiime_map.ix[:, pt_cols] return (SampleTemplate.create(st_md, study), PrepTemplate.create(pt_md, study, data_type))
def generate_new_study_with_preprocessed_data(self): """Creates a new study up to the processed data for testing""" info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 3, "number_samples_promised": 3, "study_alias": "Test EBI", "study_description": "Study for testing EBI", "study_abstract": "Study for testing EBI", "emp_person_id": StudyPerson(2), "principal_investigator_id": StudyPerson(3), "lab_person_id": StudyPerson(1), } study = Study.create(User("*****@*****.**"), "Test EBI study", [1], info) metadata_dict = { "Sample1": { "collection_timestamp": datetime(2015, 6, 1, 7, 0, 0), "physical_specimen_location": "location1", "taxon_id": 9606, "scientific_name": "h**o sapiens", "Description": "Test Sample 1", }, "Sample2": { "collection_timestamp": datetime(2015, 6, 2, 7, 0, 0), "physical_specimen_location": "location1", "taxon_id": 9606, "scientific_name": "h**o sapiens", "Description": "Test Sample 2", }, "Sample3": { "collection_timestamp": datetime(2015, 6, 3, 7, 0, 0), "physical_specimen_location": "location1", "taxon_id": 9606, "scientific_name": "h**o sapiens", "Description": "Test Sample 3", }, } metadata = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str) SampleTemplate.create(metadata, study) metadata_dict = { "Sample1": { "primer": "GTGCCAGCMGCCGCGGTAA", "barcode": "CGTAGAGCTCTC", "center_name": "KnightLab", "platform": "ILLUMINA", "instrument_model": "Illumina MiSeq", "library_construction_protocol": "Protocol ABC", "experiment_design_description": "Random value 1", }, "Sample2": { "primer": "GTGCCAGCMGCCGCGGTAA", "barcode": "CGTAGAGCTCTA", "center_name": "KnightLab", "platform": "ILLUMINA", "instrument_model": "Illumina MiSeq", "library_construction_protocol": "Protocol ABC", "experiment_design_description": "Random value 2", }, "Sample3": { "primer": "GTGCCAGCMGCCGCGGTAA", "barcode": "CGTAGAGCTCTT", "center_name": "KnightLab", "platform": "ILLUMINA", "instrument_model": "Illumina MiSeq", "library_construction_protocol": "Protocol ABC", "experiment_design_description": "Random value 3", }, } metadata = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str) pt = PrepTemplate.create(metadata, study, "16S", "Metagenomics") fna_fp = join(self.temp_dir, "seqs.fna") demux_fp = join(self.temp_dir, "demux.seqs") with open(fna_fp, "w") as f: f.write(FASTA_EXAMPLE_2.format(study.id)) with File(demux_fp, "w") as f: to_hdf5(fna_fp, f) ppd = Artifact.create([(demux_fp, 6)], "Demultiplexed", prep_template=pt) return ppd
def process_sample_template(self, study, user, callback): """Process a sample template from the POST method Parameters ---------- study : Study The current study object user : User The current user object callback : function The callback function to call with the results once the processing is done Raises ------ HTTPError If the sample template file does not exists """ # If we are on this function, the arguments "sample_template" and # "data_type" must be defined. If not, let tornado raise its error sample_template = self.get_argument('sample_template') data_type = self.get_argument('data_type') # Get the uploads folder _, base_fp = get_mountpoint("uploads")[0] # Get the path of the sample template in the uploads folder fp_rsp = join(base_fp, str(study.id), sample_template) if not exists(fp_rsp): # The file does not exist, fail nicely raise HTTPError(404, "This file doesn't exist: %s" % fp_rsp) # Define here the message and message level in case of success msg = "The sample template '%s' has been added" % sample_template msg_level = "success" is_mapping_file = looks_like_qiime_mapping_file(fp_rsp) try: if is_mapping_file and not data_type: raise ValueError("Please, choose a data type if uploading a " "QIIME mapping file") with warnings.catch_warnings(record=True) as warns: if is_mapping_file: create_templates_from_qiime_mapping_file( fp_rsp, study, int(data_type)) else: SampleTemplate.create(load_template_to_dataframe(fp_rsp), study) remove(fp_rsp) # join all the warning messages into one. Note that this # info will be ignored if an exception is raised if warns: msg = '; '.join( [convert_text_html(str(w.message)) for w in warns]) msg_level = 'warning' except (TypeError, QiitaDBColumnError, QiitaDBExecutionError, QiitaDBDuplicateError, IOError, ValueError, KeyError, CParserError, QiitaDBDuplicateHeaderError, QiitaDBError, QiitaWareError) as e: # Some error occurred while processing the sample template # Show the error to the user so they can fix the template error_msg = ('parsing the QIIME mapping file' if is_mapping_file else 'parsing the sample template') msg = html_error_message % (error_msg, basename(fp_rsp), str(e)) msg = convert_text_html(msg) msg_level = "danger" callback((msg, msg_level, None, None, None))