def remove_add_study_template(self, raw_data, study_id, fp_rsp): """Replace prep templates, raw data, and sample template with a new one """ for rd in raw_data(): rd = RawData(rd) for pt in rd.prep_templates: if PrepTemplate.exists(pt): PrepTemplate.delete(pt) if SampleTemplate.exists(study_id): SampleTemplate.delete(study_id) SampleTemplate.create(load_template_to_dataframe(fp_rsp), Study(study_id)) remove(fp_rsp)
def test_to_file(self): """to file writes a tab delimited file with all the metadata""" fd, fp = mkstemp() close(fd) st = SampleTemplate.create(self.metadata, self.new_study) st.to_file(fp) self._clean_up_files.append(fp) with open(fp, 'U') as f: obs = f.read() self.assertEqual(obs, EXP_SAMPLE_TEMPLATE)
def remove_add_study_template(self, raw_data, study_id, fp_rsp, data_type, is_mapping_file): """Replace prep templates, raw data, and sample template with a new one """ if is_mapping_file and data_type == "": raise ValueError("Please, choose a data type if uploading a QIIME " "mapping file") for rd in raw_data(): rd = RawData(rd) for pt in rd.prep_templates: if PrepTemplate.exists(pt): PrepTemplate.delete(pt) if SampleTemplate.exists(study_id): SampleTemplate.delete(study_id) if is_mapping_file: create_templates_from_qiime_mapping_file(fp_rsp, Study(study_id), int(data_type)) else: SampleTemplate.create(load_template_to_dataframe(fp_rsp), Study(study_id)) remove(fp_rsp)
def test_create(self): """Creates a new SampleTemplate""" st = SampleTemplate.create(self.metadata, self.new_study) # The returned object has the correct id self.assertEqual(st.id, 2) # The relevant rows to required_sample_info have been added. obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.required_sample_info WHERE study_id=2") # study_id sample_id physical_location has_physical_specimen # has_extracted_data sample_type required_sample_info_status_id # collection_timestamp host_subject_id description exp = [[ 2, "Sample1", "location1", True, True, "type1", 1, datetime(2014, 5, 29, 12, 24, 51), "NotIdentified", "Test Sample 1", 42.42, 41.41 ], [ 2, "Sample2", "location1", True, True, "type1", 1, datetime(2014, 5, 29, 12, 24, 51), "NotIdentified", "Test Sample 2", 4.2, 1.1 ], [ 2, "Sample3", "location1", True, True, "type1", 1, datetime(2014, 5, 29, 12, 24, 51), "NotIdentified", "Test Sample 3", 4.8, 4.41 ]] self.assertEqual(obs, exp) # The relevant rows have been added to the study_sample_columns obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.study_sample_columns WHERE study_id=2") # study_id, column_name, column_type exp = [[2, "str_column", "varchar"]] self.assertEqual(obs, exp) # The new table exists self.assertTrue(exists_table("sample_2", self.conn_handler)) # The new table hosts the correct values obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.sample_2") # sample_id, str_column exp = [['Sample1', "Value for sample 1"], ['Sample2', "Value for sample 2"], ['Sample3', "Value for sample 3"]] self.assertEqual(obs, exp)
def test_create(self): """Creates a new SampleTemplate""" st = SampleTemplate.create(self.metadata, self.new_study) # The returned object has the correct id self.assertEqual(st.id, 2) # The relevant rows to required_sample_info have been added. obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.required_sample_info WHERE study_id=2") # study_id sample_id physical_location has_physical_specimen # has_extracted_data sample_type required_sample_info_status_id # collection_timestamp host_subject_id description exp = [[2, "Sample1", "location1", True, True, "type1", 1, datetime(2014, 5, 29, 12, 24, 51), "NotIdentified", "Test Sample 1"], [2, "Sample2", "location1", True, True, "type1", 1, datetime(2014, 5, 29, 12, 24, 51), "NotIdentified", "Test Sample 2"], [2, "Sample3", "location1", True, True, "type1", 1, datetime(2014, 5, 29, 12, 24, 51), "NotIdentified", "Test Sample 3"]] self.assertEqual(obs, exp) # The relevant rows have been added to the study_sample_columns obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.study_sample_columns WHERE study_id=2") # study_id, column_name, column_type exp = [[2, "str_column", "varchar"]] self.assertEqual(obs, exp) # The new table exists self.assertTrue(exists_table("sample_2", self.conn_handler)) # The new table hosts the correct values obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.sample_2") # sample_id, str_column exp = [['Sample1', "Value for sample 1"], ['Sample2', "Value for sample 2"], ['Sample3', "Value for sample 3"]] self.assertEqual(obs, exp)
def test_retrieve_dropped_samples(self): # Create and populate second study to do test with info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 25, "number_samples_promised": 28, "portal_type_id": 3, "study_alias": "FCM", "study_description": "Microbiome of people who eat nothing but " "fried chicken", "study_abstract": "Exploring how a high fat diet changes the " "gut microbiome", "emp_person_id": StudyPerson(2), "principal_investigator_id": StudyPerson(3), "lab_person_id": StudyPerson(1) } metadata_dict = { 'SKB8.640193': {'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status': 'received', 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 1', 'str_column': 'Value for sample 1', 'latitude': 42.42, 'longitude': 41.41}, 'SKD8.640184': {'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status': 'received', 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 2', 'str_column': 'Value for sample 2', 'latitude': 4.2, 'longitude': 1.1}, 'SKB7.640196': {'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status': 'received', 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 3', 'str_column': 'Value for sample 3', 'latitude': 4.8, 'longitude': 4.41}, } metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') Study.create(User("*****@*****.**"), "Test study 2", [1], info) SampleTemplate.create(metadata, Study(2)) mp = get_mountpoint("processed_data")[0][1] study_fp = join(mp, "2_study_1001_closed_reference_otu_table.biom") ProcessedData.create("processed_params_uclust", 1, [(study_fp, 6)], study=Study(2), data_type="16S") self.conn_handler.execute( "INSERT INTO qiita.analysis_sample (analysis_id, " "processed_data_id, sample_id) VALUES " "(1,2,'2.SKB8.640193'), (1,2,'2.SKD8.640184'), " "(1,2,'2.SKB7.640196')") samples = {1: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196'], 2: ['2.SKB8.640193', '2.SKD8.640184']} self.analysis._build_biom_tables(samples, 10000, conn_handler=self.conn_handler) exp = {1: {'1.SKM4.640180', '1.SKM9.640192'}, 2: {'2.SKB7.640196'}} self.assertEqual(self.analysis.dropped_samples, exp)
def process_sample_template(self, study, user, callback): """Process a sample template from the POST method Parameters ---------- study : Study The current study object user : User The current user object callback : function The callback function to call with the results once the processing is done Raises ------ HTTPError If the sample template file does not exists """ # If we are on this function, the arguments "sample_template" and # "data_type" must be defined. If not, let tornado raise its error sample_template = self.get_argument('sample_template') data_type = self.get_argument('data_type') # Get the uploads folder _, base_fp = get_mountpoint("uploads")[0] # Get the path of the sample template in the uploads folder fp_rsp = join(base_fp, str(study.id), sample_template) if not exists(fp_rsp): # The file does not exist, fail nicely raise HTTPError(404, "This file doesn't exist: %s" % fp_rsp) # Define here the message and message level in case of success msg = "The sample template '%s' has been added" % sample_template msg_level = "success" is_mapping_file = looks_like_qiime_mapping_file(fp_rsp) try: if is_mapping_file and not data_type: raise ValueError("Please, choose a data type if uploading a " "QIIME mapping file") with warnings.catch_warnings(record=True) as warns: if is_mapping_file: create_templates_from_qiime_mapping_file(fp_rsp, study, int(data_type)) else: SampleTemplate.create(load_template_to_dataframe(fp_rsp), study) remove(fp_rsp) # join all the warning messages into one. Note that this # info will be ignored if an exception is raised if warns: msg = '; '.join([convert_text_html(str(w.message)) for w in warns]) msg_level = 'warning' except (TypeError, QiitaDBColumnError, QiitaDBExecutionError, QiitaDBDuplicateError, IOError, ValueError, KeyError, CParserError, QiitaDBDuplicateHeaderError, QiitaDBError, QiitaWareError) as e: # Some error occurred while processing the sample template # Show the error to the user so they can fix the template error_msg = ('parsing the QIIME mapping file' if is_mapping_file else 'parsing the sample template') msg = html_error_message % (error_msg, basename(fp_rsp), str(e)) msg = convert_text_html(msg) msg_level = "danger" callback((msg, msg_level, None, None, None))
def create_templates_from_qiime_mapping_file(fp, study, data_type): """Creates a sample template and a prep template from qiime mapping file Parameters ---------- fp : str or file-like object Path to the QIIME mapping file study : Study The study to which the sample template belongs to data_type : str or int The data_type of the prep_template Returns ------- (SampleTemplate, PrepTemplate) The templates created from the QIIME mapping file """ qiime_map = load_template_to_dataframe(fp, index='#SampleID') # There are a few columns in the QIIME mapping file that are special and # we know how to deal with them rename_cols = { 'BarcodeSequence': 'barcode', 'LinkerPrimerSequence': 'primer', 'Description': 'description', } if 'ReverseLinkerPrimer' in qiime_map: rename_cols['ReverseLinkerPrimer'] = 'reverselinkerprimer' missing = set(rename_cols).difference(qiime_map.columns) if missing: raise QiitaWareError( "Error generating the templates from the QIIME mapping file. " "Missing QIIME mapping file columns: %s" % ', '.join(missing)) qiime_map.rename(columns=rename_cols, inplace=True) # Fix the casing in the columns that we control qiime_map.columns = [c.lower() if c.lower() in CONTROLLED_COLS else c for c in qiime_map.columns] # Figure out which columns belong to the prep template def _col_iterator(restriction_set): for restriction in viewvalues(restriction_set): for cols in viewkeys(restriction.columns): yield cols pt_cols = set(col for col in _col_iterator(PREP_TEMPLATE_COLUMNS)) data_type_str = (convert_from_id(data_type, "data_type") if isinstance(data_type, (int, long)) else data_type) if data_type_str in TARGET_GENE_DATA_TYPES: pt_cols.update( col for col in _col_iterator(PREP_TEMPLATE_COLUMNS_TARGET_GENE)) pt_cols.add('reverselinkerprimer') qiime_cols = set(qiime_map.columns) pt_cols = qiime_cols.intersection(pt_cols) st_cols = qiime_cols.difference(pt_cols) st_md = qiime_map.ix[:, st_cols] pt_md = qiime_map.ix[:, pt_cols] return (SampleTemplate.create(st_md, study), PrepTemplate.create(pt_md, study, data_type))
def test_create_duplicate_header(self): """Create raises an error when duplicate headers are present""" self.metadata['STR_COLUMN'] = pd.Series(['', '', ''], index=self.metadata.index) with self.assertRaises(QiitaDBDuplicateHeaderError): SampleTemplate.create(self.metadata, self.new_study)
def test_create_duplicate(self): """Create raises an error when creating a duplicated SampleTemplate""" with self.assertRaises(QiitaDBDuplicateError): SampleTemplate.create(self.metadata, self.test_study)
def test_retrieve_dropped_samples(self): # Create and populate second study to do test with info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 25, "number_samples_promised": 28, "portal_type_id": 3, "study_alias": "FCM", "study_description": "Microbiome of people who eat nothing but " "fried chicken", "study_abstract": "Exploring how a high fat diet changes the " "gut microbiome", "emp_person_id": StudyPerson(2), "principal_investigator_id": StudyPerson(3), "lab_person_id": StudyPerson(1) } metadata_dict = { 'SKB8.640193': { 'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status': 'received', 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 1', 'str_column': 'Value for sample 1', 'latitude': 42.42, 'longitude': 41.41 }, 'SKD8.640184': { 'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status': 'received', 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 2', 'str_column': 'Value for sample 2', 'latitude': 4.2, 'longitude': 1.1 }, 'SKB7.640196': { 'physical_location': 'location1', 'has_physical_specimen': True, 'has_extracted_data': True, 'sample_type': 'type1', 'required_sample_info_status': 'received', 'collection_timestamp': datetime(2014, 5, 29, 12, 24, 51), 'host_subject_id': 'NotIdentified', 'Description': 'Test Sample 3', 'str_column': 'Value for sample 3', 'latitude': 4.8, 'longitude': 4.41 }, } metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') Study.create(User("*****@*****.**"), "Test study 2", [1], info) SampleTemplate.create(metadata, Study(2)) mp = get_mountpoint("processed_data")[0][1] study_fp = join(mp, "2_study_1001_closed_reference_otu_table.biom") ProcessedData.create("processed_params_uclust", 1, [(study_fp, 6)], study=Study(2), data_type="16S") self.conn_handler.execute( "INSERT INTO qiita.analysis_sample (analysis_id, " "processed_data_id, sample_id) VALUES " "(1,2,'2.SKB8.640193'), (1,2,'2.SKD8.640184'), " "(1,2,'2.SKB7.640196')") samples = { 1: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196'], 2: ['2.SKB8.640193', '2.SKD8.640184'] } self.analysis._build_biom_tables(samples, 10000, conn_handler=self.conn_handler) exp = {1: {'1.SKM4.640180', '1.SKM9.640192'}, 2: {'2.SKB7.640196'}} self.assertEqual(self.analysis.dropped_samples, exp)
def generate_new_study_with_preprocessed_data(self): """Creates a new study up to the processed data for testing""" # ignoring warnings generated when adding templates simplefilter("ignore") info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 3, "number_samples_promised": 3, "study_alias": "Test EBI", "study_description": "Study for testing EBI", "study_abstract": "Study for testing EBI", "emp_person_id": StudyPerson(2), "principal_investigator_id": StudyPerson(3), "lab_person_id": StudyPerson(1) } study = Study.create(User('*****@*****.**'), "Test EBI study", [1], info) metadata_dict = { 'Sample1': {'collection_timestamp': datetime(2015, 6, 1, 7, 0, 0), 'physical_specimen_location': 'location1', 'taxon_id': 9606, 'scientific_name': 'h**o sapiens', 'Description': 'Test Sample 1'}, 'Sample2': {'collection_timestamp': datetime(2015, 6, 2, 7, 0, 0), 'physical_specimen_location': 'location1', 'taxon_id': 9606, 'scientific_name': 'h**o sapiens', 'Description': 'Test Sample 2'}, 'Sample3': {'collection_timestamp': datetime(2015, 6, 3, 7, 0, 0), 'physical_specimen_location': 'location1', 'taxon_id': 9606, 'scientific_name': 'h**o sapiens', 'Description': 'Test Sample 3'} } metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') SampleTemplate.create(metadata, study) metadata_dict = { 'Sample1': {'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTC', 'center_name': 'KnightLab', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'Protocol ABC', 'experiment_design_description': "Random value 1"}, 'Sample2': {'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTA', 'center_name': 'KnightLab', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'Protocol ABC', 'experiment_design_description': "Random value 2"}, 'Sample3': {'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTT', 'center_name': 'KnightLab', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'Protocol ABC', 'experiment_design_description': "Random value 3"}, } metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') pt = PrepTemplate.create(metadata, study, "16S", 'Metagenomics') fna_fp = join(self.temp_dir, 'seqs.fna') demux_fp = join(self.temp_dir, 'demux.seqs') with open(fna_fp, 'w') as f: f.write(FASTA_EXAMPLE_2.format(study.id)) with File(demux_fp, 'w') as f: to_hdf5(fna_fp, f) ppd = PreprocessedData.create( study, "preprocessed_sequence_illumina_params", 1, [(demux_fp, 6)], pt) return ppd