def setUp(self): fd, self.seqs_fp = mkstemp(suffix='_seqs.fastq') close(fd) fd, self.barcodes_fp = mkstemp(suffix='_barcodes.fastq') close(fd) self.filetype = 2 self.filepaths = [(self.seqs_fp, 1), (self.barcodes_fp, 2)] _, self.db_test_raw_dir = get_mountpoint('raw_data')[0] with open(self.seqs_fp, "w") as f: f.write("\n") with open(self.barcodes_fp, "w") as f: f.write("\n") self._clean_up_files = [] # Create some new PrepTemplates metadata_dict = { 'SKB8.640193': {'center_name': 'ANL', 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'GTCCGCAAGTTA', 'run_prefix': "s_G1_L001_sequences", 'platform': 'ILLUMINA', 'library_construction_protocol': 'AAAA', 'experiment_design_description': 'BBBB'}} metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') self.pt1 = PrepTemplate.create(metadata, Study(1), "16S") self.pt2 = PrepTemplate.create(metadata, Study(1), "18S") self.prep_templates = [self.pt1, self.pt2]
def test_get_sample_names_by_run_prefix(self): metadata_dict = { 'SKB8.640193': {'run_prefix': "s1", 'primer': 'A', 'barcode': 'A', 'center_name': 'ANL', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'A', 'experiment_design_description': 'A'}, 'SKD8.640184': {'run_prefix': "s2", 'primer': 'A', 'barcode': 'A', 'center_name': 'ANL', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'A', 'experiment_design_description': 'A'}, 'SKB7.640196': {'run_prefix': "s3", 'primer': 'A', 'barcode': 'A', 'center_name': 'ANL', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'A', 'experiment_design_description': 'A'}} md_template = pd.DataFrame.from_dict(metadata_dict, orient='index') prep_template = PrepTemplate.create(md_template, Study(1), '16S') for _, fp in prep_template.get_filepaths(): self.files_to_remove.append(fp) obs = _get_sample_names_by_run_prefix(prep_template) exp = {'s3': '1.SKB7.640196', 's2': '1.SKD8.640184', 's1': '1.SKB8.640193'} self.assertEqual(obs, exp) # This should raise an error metadata_dict = { 'SKB8.640193': {'run_prefix': "s1", 'primer': 'A', 'barcode': 'A', 'center_name': 'ANL', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'A', 'experiment_design_description': 'A'}, 'SKD8.640184': {'run_prefix': "s1", 'primer': 'A', 'barcode': 'A', 'center_name': 'ANL', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'A', 'experiment_design_description': 'A'}, 'SKB7.640196': {'run_prefix': "s3", 'primer': 'A', 'barcode': 'A', 'center_name': 'ANL', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'A', 'experiment_design_description': 'A'}} md_template = pd.DataFrame.from_dict(metadata_dict, orient='index') prep_template = PrepTemplate.create(md_template, Study(1), '16S') for _, fp in prep_template.get_filepaths(): self.files_to_remove.append(fp) with self.assertRaises(ValueError): _get_sample_names_by_run_prefix(prep_template)
def remove_add_prep_template(self, fp_rpt, raw_data_id, study, data_type_id, investigation_type, callback): """add prep templates """ PrepTemplate.create(load_template_to_dataframe(fp_rpt), RawData(raw_data_id), study, int(data_type_id), investigation_type=investigation_type) remove(fp_rpt) callback()
def remove_add_prep_template(self, fp_rpt, study, data_type_id, investigation_type): """add prep templates""" pt_id = PrepTemplate.create( load_template_to_dataframe(fp_rpt), study, _to_int(data_type_id), investigation_type=investigation_type ).id remove(fp_rpt) return pt_id
def test_create(self): """Creates a new PrepTemplate""" pt = PrepTemplate.create(self.metadata, self.new_raw_data) # The returned object has the correct id self.assertEqual(pt.id, 3) # The relevant rows to common_prep_info have been added. obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.common_prep_info WHERE raw_data_id=3") # raw_data_id, sample_id, center_name, center_project_name, # ebi_submission_accession, ebi_study_accession, emp_status_id, # data_type_id exp = [[3, 'SKB8.640193', 'ANL', 'Test Project', 1, 2], [3, 'SKD8.640184', 'ANL', 'Test Project', 1, 2], [3, 'SKB7.640196', 'ANL', 'Test Project', 1, 2]] self.assertEqual(sorted(obs), sorted(exp)) # The relevant rows have been added to the raw_data_prep_columns obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.raw_data_prep_columns WHERE raw_data_id=3") # raw_data_id, column_name, column_type exp = [[3, 'str_column', 'varchar'], [3, 'ebi_submission_accession', 'varchar']] self.assertEqual(obs, exp) # The new table exists self.assertTrue(exists_table("prep_3", self.conn_handler)) # The new table hosts the correct values obs = self.conn_handler.execute_fetchall("SELECT * FROM qiita.prep_3") # sample_id, str_column exp = [['SKB8.640193', "Value for sample 1", None], ['SKD8.640184', "Value for sample 2", None], ['SKB7.640196', "Value for sample 3", None]] self.assertEqual(sorted(obs), sorted(exp))
def test_get_preprocess_fastq_cmd_per_sample_FASTQ_failure(self): metadata_dict = { 'SKB8.640193': {'run_prefix': "sample1_failure", 'primer': 'A', 'barcode': 'A', 'center_name': 'ANL', 'platform': 'ILLUMINA', 'library_construction_protocol': 'A', 'experiment_design_description': 'A'}} md_template = pd.DataFrame.from_dict(metadata_dict, orient='index') prep_template = PrepTemplate.create(md_template, Study(1), '16S') # This part should fail fp1 = self.path_builder('sample1_failure.fastq') with open(fp1, 'w') as f: f.write('\n') self.files_to_remove.append(fp1) fp2 = self.path_builder('sample1_failure.barcodes.fastq.gz') with open(fp2, 'w') as f: f.write('\n') self.files_to_remove.append(fp2) forward_filepath_id = convert_to_id('raw_forward_seqs', 'filepath_type') barcode_filepath_id = convert_to_id('raw_barcodes', 'filepath_type') fps = [(fp1, forward_filepath_id), (fp2, barcode_filepath_id)] filetype_id = get_filetypes()['per_sample_FASTQ'] raw_data = RawData.create(filetype_id, [prep_template], fps) params = [p for p in list(PreprocessedIlluminaParams.iter()) if p.name == 'per sample FASTQ defaults'][0] with self.assertRaises(ValueError): _get_preprocess_fastq_cmd(raw_data, prep_template, params)
def test_create(self): """Creates a new PrepTemplate""" pt = PrepTemplate.create(self.metadata, self.new_raw_data) # The returned object has the correct id self.assertEqual(pt.id, 3) # The relevant rows to common_prep_info have been added. obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.common_prep_info WHERE raw_data_id=3") # raw_data_id, sample_id, center_name, center_project_name, # ebi_submission_accession, ebi_study_accession, emp_status_id, # data_type_id exp = [[3, 'SKB8.640193', 'ANL', 'Test Project', None, None, 1, 2], [3, 'SKD8.640184', 'ANL', 'Test Project', None, None, 1, 2], [3, 'SKB7.640196', 'ANL', 'Test Project', None, None, 1, 2]] self.assertEqual(sorted(obs), sorted(exp)) # The relevant rows have been added to the raw_data_prep_columns obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.raw_data_prep_columns WHERE raw_data_id=3") # raw_data_id, column_name, column_type exp = [[3, "str_column", "varchar"]] self.assertEqual(obs, exp) # The new table exists self.assertTrue(exists_table("prep_3", self.conn_handler)) # The new table hosts the correct values obs = self.conn_handler.execute_fetchall( "SELECT * FROM qiita.prep_3") # sample_id, str_column exp = [['SKB8.640193', "Value for sample 1"], ['SKD8.640184', "Value for sample 2"], ['SKB7.640196', "Value for sample 3"]] self.assertEqual(sorted(obs), sorted(exp))
def test_get_qiime_minimal_mapping_multiple(self): # We need to create a prep template in which we have different run # prefix values, so we can test this case metadata_dict = { 'SKB8.640193': {'center_name': 'ANL', 'center_project_name': 'Test Project', 'ebi_submission_accession': None, 'EMP_status': 'EMP', 'str_column': 'Value for sample 1', 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'GTCCGCAAGTTA', 'run_prefix': "s_G1_L001_sequences", 'platform': 'ILLUMINA', 'library_construction_protocol': 'AAA', 'experiment_design_description': 'BBB'}, 'SKD8.640184': {'center_name': 'ANL', 'center_project_name': 'Test Project', 'ebi_submission_accession': None, 'EMP_status': 'EMP', 'str_column': 'Value for sample 2', 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTC', 'run_prefix': "s_G1_L001_sequences", 'platform': 'ILLUMINA', 'library_construction_protocol': 'AAA', 'experiment_design_description': 'BBB'}, 'SKB7.640196': {'center_name': 'ANL', 'center_project_name': 'Test Project', 'ebi_submission_accession': None, 'EMP_status': 'EMP', 'str_column': 'Value for sample 3', 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CCTCTGAGAGCT', 'run_prefix': "s_G1_L002_sequences", 'platform': 'ILLUMINA', 'library_construction_protocol': 'AAA', 'experiment_design_description': 'BBB'} } md_template = pd.DataFrame.from_dict(metadata_dict, orient='index') prep_template = PrepTemplate.create(md_template, Study(1), '16S') for _, fp in prep_template.get_filepaths(): self.files_to_remove.append(fp) out_dir = mkdtemp() obs_fps = sorted(_get_qiime_minimal_mapping(prep_template, out_dir)) exp_fps = sorted([join(out_dir, 's_G1_L001_sequences_MMF.txt'), join(out_dir, 's_G1_L002_sequences_MMF.txt')]) # Check that the returned list is as expected self.assertEqual(obs_fps, exp_fps) # Check that the file exists for fp in exp_fps: self.assertTrue(exists(fp)) # Check the contents of the file for fp, contents in zip(exp_fps, [EXP_PREP_1, EXP_PREP_2]): with open(fp, "U") as f: self.assertEqual(f.read(), contents)
def remove_add_prep_template(self, fp_rpt, raw_data_id, study, data_type_id, investigation_type): """add prep templates""" pt_id = PrepTemplate.create(load_template_to_dataframe(fp_rpt), RawData(raw_data_id), study, _to_int(data_type_id), investigation_type=investigation_type).id remove(fp_rpt) return pt_id
def test_to_file(self): """to file writes a tab delimited file with all the metadata""" fd, fp = mkstemp() close(fd) pt = PrepTemplate.create(self.metadata, self.new_raw_data) pt.to_file(fp) self._clean_up_files.append(fp) with open(fp, 'U') as f: obs = f.read() self.assertEqual(obs, EXP_PREP_TEMPLATE)
def test_to_file(self): """to file writes a tab delimited file with all the metadata""" fd, fp = mkstemp() close(fd) pt = PrepTemplate.create(self.metadata, self.new_raw_data) pt.to_file(fp) self._clean_up_files.append(fp) with open(fp, 'U') as f: obs = f.read() self.assertEqual(obs, EXP_PREP_TEMPLATE)
def test_get_preprocess_fastq_cmd_per_sample_FASTQ(self): metadata_dict = { 'SKB8.640193': {'run_prefix': "sample1", 'primer': 'A', 'barcode': 'A', 'center_name': 'ANL', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'A', 'experiment_design_description': 'A'}, 'SKD8.640184': {'run_prefix': "sample2", 'primer': 'A', 'barcode': 'A', 'center_name': 'ANL', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'A', 'experiment_design_description': 'A'}} md_template = pd.DataFrame.from_dict(metadata_dict, orient='index') prep_template = PrepTemplate.create(md_template, Study(1), '16S') fp1 = self.path_builder('sample1.fastq') with open(fp1, 'w') as f: f.write('\n') self.files_to_remove.append(fp1) fp2 = self.path_builder('sample2.fastq.gz') with open(fp2, 'w') as f: f.write('\n') self.files_to_remove.append(fp2) filepath_id = convert_to_id('raw_forward_seqs', 'filepath_type') fps = [(fp1, filepath_id), (fp2, filepath_id)] filetype_id = get_filetypes()['per_sample_FASTQ'] raw_data = RawData.create(filetype_id, [prep_template], fps) params = [p for p in list(PreprocessedIlluminaParams.iter()) if p.name == 'per sample FASTQ defaults'][0] obs_cmd, obs_output_dir = _get_preprocess_fastq_cmd(raw_data, prep_template, params) raw_fps = ','.join([fp for _, fp, _ in sorted(raw_data.get_filepaths())]) exp_cmd = ( "split_libraries_fastq.py --store_demultiplexed_fastq -i " "{} --sample_ids 1.SKB8.640193,1.SKD8.640184 -o {} --barcode_type " "not-barcoded --max_bad_run_length 3 --max_barcode_errors 1.5 " "--min_per_read_length_fraction 0.75 --phred_quality_threshold 3 " "--sequence_max_n 0").format(raw_fps, obs_output_dir) self.assertEqual(obs_cmd, exp_cmd)
def test_load_data_from_cmd(self): filepaths = [self.forward_fp, self.reverse_fp, self.barcodes_fp] filepath_types = ['raw_forward_seqs', 'raw_reverse_seqs', 'raw_barcodes'] filetype = 'FASTQ' metadata_dict = { 'SKB8.640193': {'center_name': 'ANL', 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'GTCCGCAAGTTA', 'run_prefix': "s_G1_L001_sequences", 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'AAAA', 'experiment_design_description': 'BBBB'}} metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') pt1 = PrepTemplate.create(metadata, Study(1), "16S") prep_templates = [pt1.id] initial_raw_count = get_count('qiita.raw_data') initial_fp_count = get_count('qiita.filepath') initial_raw_fp_count = get_count('qiita.raw_filepath') new = load_raw_data_cmd(filepaths, filepath_types, filetype, prep_templates) raw_data_id = new.id self.files_to_remove.append( join(self.db_test_raw_dir, '%d_%s' % (raw_data_id, basename(self.forward_fp)))) self.files_to_remove.append( join(self.db_test_raw_dir, '%d_%s' % (raw_data_id, basename(self.reverse_fp)))) self.files_to_remove.append( join(self.db_test_raw_dir, '%d_%s' % (raw_data_id, basename(self.barcodes_fp)))) self.assertTrue(check_count('qiita.raw_data', initial_raw_count + 1)) self.assertTrue(check_count('qiita.filepath', initial_fp_count + 3)) self.assertTrue(check_count('qiita.raw_filepath', initial_raw_fp_count + 3)) # Ensure that the ValueError is raised when a filepath_type is not # provided for each and every filepath with self.assertRaises(ValueError): load_raw_data_cmd(filepaths, filepath_types[:-1], filetype, prep_templates)
def test_move_filepaths_to_upload_folder(self): # setting up test, done here as this is the only test that uses these # files fd, seqs_fp = mkstemp(suffix="_seqs.fastq") close(fd) st = Study(1) metadata_dict = { "SKB8.640193": { "center_name": "ANL", "primer": "GTGCCAGCMGCCGCGGTAA", "barcode": "GTCCGCAAGTTA", "run_prefix": "s_G1_L001_sequences", "platform": "ILLUMINA", "library_construction_protocol": "AAAA", "experiment_design_description": "BBBB", } } metadata = pd.DataFrame.from_dict(metadata_dict, orient="index") pt = PrepTemplate.create(metadata, Study(1), "16S") rd = RawData.create(2, [pt], [(seqs_fp, 1)]) filepaths = rd.get_filepaths() # deleting reference so we can directly call # move_filepaths_to_upload_folder for fid, _, _ in filepaths: self.conn_handler.execute("DELETE FROM qiita.raw_filepath WHERE filepath_id=%s", (fid,)) # moving filepaths move_filepaths_to_upload_folder(st.id, filepaths) # check that they do not exist in the old path but do in the new one path_for_removal = join(get_mountpoint("uploads")[0][1], str(st.id)) for _, fp, _ in filepaths: self.assertFalse(exists(fp)) new_fp = join(path_for_removal, basename(fp).split("_", 1)[1]) self.assertTrue(exists(new_fp)) self.files_to_remove.append(new_fp)
def generate_new_study_with_preprocessed_data(self): """Creates a new study up to the processed data for testing""" # ignoring warnings generated when adding templates simplefilter("ignore") info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 3, "number_samples_promised": 3, "study_alias": "Test EBI", "study_description": "Study for testing EBI", "study_abstract": "Study for testing EBI", "emp_person_id": StudyPerson(2), "principal_investigator_id": StudyPerson(3), "lab_person_id": StudyPerson(1) } study = Study.create(User('*****@*****.**'), "Test EBI study", [1], info) metadata_dict = { 'Sample1': {'collection_timestamp': datetime(2015, 6, 1, 7, 0, 0), 'physical_specimen_location': 'location1', 'taxon_id': 9606, 'scientific_name': 'h**o sapiens', 'Description': 'Test Sample 1'}, 'Sample2': {'collection_timestamp': datetime(2015, 6, 2, 7, 0, 0), 'physical_specimen_location': 'location1', 'taxon_id': 9606, 'scientific_name': 'h**o sapiens', 'Description': 'Test Sample 2'}, 'Sample3': {'collection_timestamp': datetime(2015, 6, 3, 7, 0, 0), 'physical_specimen_location': 'location1', 'taxon_id': 9606, 'scientific_name': 'h**o sapiens', 'Description': 'Test Sample 3'} } metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') SampleTemplate.create(metadata, study) metadata_dict = { 'Sample1': {'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTC', 'center_name': 'KnightLab', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'Protocol ABC', 'experiment_design_description': "Random value 1"}, 'Sample2': {'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTA', 'center_name': 'KnightLab', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'Protocol ABC', 'experiment_design_description': "Random value 2"}, 'Sample3': {'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTT', 'center_name': 'KnightLab', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'Protocol ABC', 'experiment_design_description': "Random value 3"}, } metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') pt = PrepTemplate.create(metadata, study, "16S", 'Metagenomics') fna_fp = join(self.temp_dir, 'seqs.fna') demux_fp = join(self.temp_dir, 'demux.seqs') with open(fna_fp, 'w') as f: f.write(FASTA_EXAMPLE_2.format(study.id)) with File(demux_fp, 'w') as f: to_hdf5(fna_fp, f) ppd = PreprocessedData.create( study, "preprocessed_sequence_illumina_params", 1, [(demux_fp, 6)], pt) return ppd
def create_templates_from_qiime_mapping_file(fp, study, data_type): """Creates a sample template and a prep template from qiime mapping file Parameters ---------- fp : str or file-like object Path to the QIIME mapping file study : Study The study to which the sample template belongs to data_type : str or int The data_type of the prep_template Returns ------- (SampleTemplate, PrepTemplate) The templates created from the QIIME mapping file """ qiime_map = load_template_to_dataframe(fp, index='#SampleID') # There are a few columns in the QIIME mapping file that are special and # we know how to deal with them rename_cols = { 'BarcodeSequence': 'barcode', 'LinkerPrimerSequence': 'primer', 'Description': 'description', } if 'ReverseLinkerPrimer' in qiime_map: rename_cols['ReverseLinkerPrimer'] = 'reverselinkerprimer' missing = set(rename_cols).difference(qiime_map.columns) if missing: raise QiitaWareError( "Error generating the templates from the QIIME mapping file. " "Missing QIIME mapping file columns: %s" % ', '.join(missing)) qiime_map.rename(columns=rename_cols, inplace=True) # Fix the casing in the columns that we control qiime_map.columns = [c.lower() if c.lower() in CONTROLLED_COLS else c for c in qiime_map.columns] # Figure out which columns belong to the prep template def _col_iterator(restriction_set): for restriction in viewvalues(restriction_set): for cols in viewkeys(restriction.columns): yield cols pt_cols = set(col for col in _col_iterator(PREP_TEMPLATE_COLUMNS)) data_type_str = (convert_from_id(data_type, "data_type") if isinstance(data_type, (int, long)) else data_type) if data_type_str in TARGET_GENE_DATA_TYPES: pt_cols.update( col for col in _col_iterator(PREP_TEMPLATE_COLUMNS_TARGET_GENE)) pt_cols.add('reverselinkerprimer') qiime_cols = set(qiime_map.columns) pt_cols = qiime_cols.intersection(pt_cols) st_cols = qiime_cols.difference(pt_cols) st_md = qiime_map.ix[:, st_cols] pt_md = qiime_map.ix[:, pt_cols] return (SampleTemplate.create(st_md, study), PrepTemplate.create(pt_md, study, data_type))
def test_get_qiime_minimal_mapping_multiple(self): # We need to create a prep template in which we have different run # prefix values, so we can test this case metadata_dict = { 'SKB8.640193': { 'center_name': 'ANL', 'center_project_name': 'Test Project', 'ebi_submission_accession': None, 'EMP_status': 'EMP', 'str_column': 'Value for sample 1', 'linkerprimersequence': 'GTGCCAGCMGCCGCGGTAA', 'barcodesequence': 'GTCCGCAAGTTA', 'run_prefix': "s_G1_L001_sequences", 'platform': 'ILLUMINA', 'library_construction_protocol': 'AAA', 'experiment_design_description': 'BBB' }, 'SKD8.640184': { 'center_name': 'ANL', 'center_project_name': 'Test Project', 'ebi_submission_accession': None, 'EMP_status': 'EMP', 'str_column': 'Value for sample 2', 'linkerprimersequence': 'GTGCCAGCMGCCGCGGTAA', 'barcodesequence': 'CGTAGAGCTCTC', 'run_prefix': "s_G1_L001_sequences", 'platform': 'ILLUMINA', 'library_construction_protocol': 'AAA', 'experiment_design_description': 'BBB' }, 'SKB7.640196': { 'center_name': 'ANL', 'center_project_name': 'Test Project', 'ebi_submission_accession': None, 'EMP_status': 'EMP', 'str_column': 'Value for sample 3', 'linkerprimersequence': 'GTGCCAGCMGCCGCGGTAA', 'barcodesequence': 'CCTCTGAGAGCT', 'run_prefix': "s_G1_L002_sequences", 'platform': 'ILLUMINA', 'library_construction_protocol': 'AAA', 'experiment_design_description': 'BBB' } } md_template = pd.DataFrame.from_dict(metadata_dict, orient='index') prep_template = PrepTemplate.create(md_template, RawData(2), Study(1), '16S') out_dir = mkdtemp() obs_fps = sorted(_get_qiime_minimal_mapping(prep_template, out_dir)) exp_fps = sorted([ join(out_dir, 's_G1_L001_sequences_MMF.txt'), join(out_dir, 's_G1_L002_sequences_MMF.txt') ]) # Check that the returned list is as expected self.assertEqual(obs_fps, exp_fps) # Check that the file exists for fp in exp_fps: self.assertTrue(exists(fp)) # Check the contents of the file for fp, contents in zip(exp_fps, [EXP_PREP_1, EXP_PREP_2]): with open(fp, "U") as f: self.assertEqual(f.read(), contents)
def setUp(self): self.db_dir = get_db_files_base_dir() # Create a SFF dataset: add prep template and a RawData study = Study(1) md_dict = { 'SKB8.640193': {'center_name': 'ANL', 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'GTCCGCAAGTTA', 'run_prefix': "preprocess_test", 'platform': 'ILLUMINA', 'library_construction_protocol': 'AAAA', 'experiment_design_description': 'BBBB'}, 'SKD8.640184': {'center_name': 'ANL', 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTC', 'run_prefix': "preprocess_test", 'platform': 'ILLUMINA', 'library_construction_protocol': 'AAAA', 'experiment_design_description': 'BBBB'}, 'SKB7.640196': {'center_name': 'ANL', 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CCTCTGAGAGCT', 'run_prefix': "preprocess_test", 'platform': 'ILLUMINA', 'library_construction_protocol': 'AAAA', 'experiment_design_description': 'BBBB'} } md = pd.DataFrame.from_dict(md_dict, orient='index') self.sff_prep_template = PrepTemplate.create(md, study, "16S") tmp_dir = mkdtemp() self.path_builder = partial(join, tmp_dir) fp1 = self.path_builder('preprocess_test1.sff') with open(fp1, 'w') as f: f.write('\n') fp2 = self.path_builder('preprocess_test2.sff') with open(fp2, 'w') as f: f.write('\n') self.raw_sff_id = convert_to_id('raw_sff', 'filepath_type') fps = [(fp1, self.raw_sff_id), (fp2, self.raw_sff_id)] # Magic number 1: is the filetype id self.raw_data = RawData.create(1, [self.sff_prep_template], fps) md = pd.DataFrame.from_dict(md_dict, orient='index') self.sff_prep_template_gz = PrepTemplate.create(md, study, "16S") fp1_gz = self.path_builder('preprocess_test1.sff.gz') with gzip.open(fp1_gz, 'w') as f: f.write('\n') fps = [(fp1_gz, self.raw_sff_id)] self.raw_data_gz = RawData.create(1, [self.sff_prep_template_gz], fps) # Create a SFF dataset with multiple run prefix: # add prep template and a RawData md_dict['SKD8.640184']['run_prefix'] = "new" md_rp = pd.DataFrame.from_dict(md_dict, orient='index') self.sff_prep_template_rp = PrepTemplate.create(md_rp, study, "16S") rp_fp1 = self.path_builder('preprocess_test1.sff') with open(rp_fp1, 'w') as f: f.write('\n') rp_fp2 = self.path_builder('preprocess_test2.sff') with open(rp_fp2, 'w') as f: f.write('\n') fps = [(rp_fp1, self.raw_sff_id), (rp_fp2, self.raw_sff_id)] # Magic number 1: is the filetype id self.raw_data_rp = RawData.create(1, [self.sff_prep_template_rp], fps) # Make sure that we clean up all created files self.files_to_remove = [fp1, fp2, rp_fp1, rp_fp2] self.dirs_to_remove = [tmp_dir] for pt in [self.sff_prep_template, self.sff_prep_template_rp]: for _, fp in pt.get_filepaths(): self.files_to_remove.append(fp)
def test_create_duplicate(self): """Create raises an error when creating a duplicated PrepTemplate""" with self.assertRaises(QiitaDBDuplicateError): PrepTemplate.create(self.metadata, self.test_raw_data)
def test_create_duplicate_header(self): """Create raises an error when duplicate headers are present""" self.metadata['STR_COLUMN'] = pd.Series(['', '', ''], index=self.metadata.index) with self.assertRaises(QiitaDBDuplicateHeaderError): PrepTemplate.create(self.metadata, self.new_raw_data)
def test_create_duplicate(self): """Create raises an error when creating a duplicated PrepTemplate""" with self.assertRaises(QiitaDBDuplicateError): PrepTemplate.create(self.metadata, self.test_raw_data)
def test_create_duplicate_header(self): """Create raises an error when duplicate headers are present""" self.metadata['STR_COLUMN'] = pd.Series(['', '', ''], index=self.metadata.index) with self.assertRaises(QiitaDBDuplicateHeaderError): PrepTemplate.create(self.metadata, self.new_raw_data)