def test_get_qiime_minimal_mapping_single_reverse_primer(self): conn_handler = SQLConnectionHandler() conn_handler sql = """INSERT INTO qiita.prep_columns (prep_template_id, column_name, column_type) VALUES (1, 'reverselinkerprimer', 'varchar'); ALTER TABLE qiita.prep_1 ADD COLUMN reverselinkerprimer varchar; DELETE FROM qiita.prep_columns WHERE prep_template_id = 1 AND column_name = 'run_prefix'; ALTER TABLE qiita.prep_1 DROP COLUMN run_prefix; UPDATE qiita.prep_1 SET reverselinkerprimer = %s """ conn_handler.execute(sql, ('GTGCCAGCM',)) prep_template = PrepTemplate(1) prep_template.generate_files() out_dir = mkdtemp() obs_fps = _get_qiime_minimal_mapping(prep_template, out_dir) exp_fps = [join(out_dir, 'prep_1_MMF.txt')] # Check that the returned list is as expected self.assertEqual(obs_fps, exp_fps) # Check that the file exists self.assertTrue(exists(exp_fps[0])) # Check the contents of the file with open(exp_fps[0], "U") as f: self.assertEqual(f.read(), EXP_PREP_RLP)
def test_get_qiime_minimal_mapping_multiple(self): # We need to create a prep template in which we have different run # prefix values, so we can test this case metadata_dict = { 'SKB8.640193': {'center_name': 'ANL', 'center_project_name': 'Test Project', 'ebi_submission_accession': None, 'EMP_status': 'EMP', 'str_column': 'Value for sample 1', 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'GTCCGCAAGTTA', 'run_prefix': "s_G1_L001_sequences", 'platform': 'ILLUMINA', 'library_construction_protocol': 'AAA', 'experiment_design_description': 'BBB'}, 'SKD8.640184': {'center_name': 'ANL', 'center_project_name': 'Test Project', 'ebi_submission_accession': None, 'EMP_status': 'EMP', 'str_column': 'Value for sample 2', 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTC', 'run_prefix': "s_G1_L001_sequences", 'platform': 'ILLUMINA', 'library_construction_protocol': 'AAA', 'experiment_design_description': 'BBB'}, 'SKB7.640196': {'center_name': 'ANL', 'center_project_name': 'Test Project', 'ebi_submission_accession': None, 'EMP_status': 'EMP', 'str_column': 'Value for sample 3', 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CCTCTGAGAGCT', 'run_prefix': "s_G1_L002_sequences", 'platform': 'ILLUMINA', 'library_construction_protocol': 'AAA', 'experiment_design_description': 'BBB'} } md_template = pd.DataFrame.from_dict(metadata_dict, orient='index') prep_template = PrepTemplate.create(md_template, Study(1), '16S') for _, fp in prep_template.get_filepaths(): self.files_to_remove.append(fp) out_dir = mkdtemp() obs_fps = sorted(_get_qiime_minimal_mapping(prep_template, out_dir)) exp_fps = sorted([join(out_dir, 's_G1_L001_sequences_MMF.txt'), join(out_dir, 's_G1_L002_sequences_MMF.txt')]) # Check that the returned list is as expected self.assertEqual(obs_fps, exp_fps) # Check that the file exists for fp in exp_fps: self.assertTrue(exists(fp)) # Check the contents of the file for fp, contents in zip(exp_fps, [EXP_PREP_1, EXP_PREP_2]): with open(fp, "U") as f: self.assertEqual(f.read(), contents)
def test_get_qiime_minimal_mapping_single(self): prep_template = PrepTemplate(1) out_dir = mkdtemp() obs_fps = _get_qiime_minimal_mapping(prep_template, out_dir) exp_fps = [join(out_dir, 's_G1_L001_sequences_MMF.txt')] # Check that the returned list is as expected self.assertEqual(obs_fps, exp_fps) # Check that the file exists self.assertTrue(exists(exp_fps[0])) # Check the contents of the file with open(exp_fps[0], "U") as f: self.assertEqual(f.read(), EXP_PREP)
def test_get_qiime_minimal_mapping_single_no_run_prefix(self): conn_handler = SQLConnectionHandler() sql = """DELETE FROM qiita.prep_columns WHERE prep_template_id = 1 AND column_name = 'run_prefix'; ALTER TABLE qiita.prep_1 DROP COLUMN run_prefix""" conn_handler.execute(sql) prep_template = PrepTemplate(1) prep_template.generate_files() out_dir = mkdtemp() obs_fps = _get_qiime_minimal_mapping(prep_template, out_dir) exp_fps = [join(out_dir, 'prep_1_MMF.txt')] # Check that the returned list is as expected self.assertEqual(obs_fps, exp_fps) # Check that the file exists self.assertTrue(exists(exp_fps[0])) # Check the contents of the file with open(exp_fps[0], "U") as f: self.assertEqual(f.read(), EXP_PREP)
def test_get_qiime_minimal_mapping_numeric_sample_ids(self): # Get minimal mapping file works correctly with numeric sample ids. A # bug was found that samples of the type <study_id>.[0-9]*0 where # truncated to <study_id>.[0-9]* info = {"timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 25, "number_samples_promised": 28, "study_alias": "testing", "study_description": "Test description", "study_abstract": "Test abstract", "emp_person_id": StudyPerson(2), "principal_investigator_id": StudyPerson(3), "lab_person_id": StudyPerson(1) } new_study = Study.create(User('*****@*****.**'), "Test study", [1], info) metadata_dict = {'1': {'host_subject_id': 'NotIdentified'}, '10': {'host_subject_id': 'NotIdentified'}} metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') npt.assert_warns(QiitaDBWarning, SampleTemplate.create, metadata, new_study) metadata_dict = {'1': {'str_column': 'Value for sample 1', 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'GTCCGCAAGTTA'}, '10': {'str_column': 'Value for sample 1', 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTC'}} metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') pt = npt.assert_warns(QiitaDBWarning, PrepTemplate.create, metadata, new_study, 2) out_dir = mkdtemp() obs_fps = _get_qiime_minimal_mapping(pt, out_dir) exp_fps = [join(out_dir, 'prep_%s_MMF.txt' % pt.id)] self.assertEqual(obs_fps, exp_fps) self.assertTrue(exists(exp_fps[0])) with open(exp_fps[0], 'U') as f: self.assertEqual(f.read(), EXP_PREP_NUM.format(new_study.id))
def test_get_qiime_minimal_mapping_multiple(self): # We need to create a prep template in which we have different run # prefix values, so we can test this case metadata_dict = { 'SKB8.640193': { 'center_name': 'ANL', 'center_project_name': 'Test Project', 'ebi_submission_accession': None, 'EMP_status': 'EMP', 'str_column': 'Value for sample 1', 'linkerprimersequence': 'GTGCCAGCMGCCGCGGTAA', 'barcodesequence': 'GTCCGCAAGTTA', 'run_prefix': "s_G1_L001_sequences", 'platform': 'ILLUMINA', 'library_construction_protocol': 'AAA', 'experiment_design_description': 'BBB' }, 'SKD8.640184': { 'center_name': 'ANL', 'center_project_name': 'Test Project', 'ebi_submission_accession': None, 'EMP_status': 'EMP', 'str_column': 'Value for sample 2', 'linkerprimersequence': 'GTGCCAGCMGCCGCGGTAA', 'barcodesequence': 'CGTAGAGCTCTC', 'run_prefix': "s_G1_L001_sequences", 'platform': 'ILLUMINA', 'library_construction_protocol': 'AAA', 'experiment_design_description': 'BBB' }, 'SKB7.640196': { 'center_name': 'ANL', 'center_project_name': 'Test Project', 'ebi_submission_accession': None, 'EMP_status': 'EMP', 'str_column': 'Value for sample 3', 'linkerprimersequence': 'GTGCCAGCMGCCGCGGTAA', 'barcodesequence': 'CCTCTGAGAGCT', 'run_prefix': "s_G1_L002_sequences", 'platform': 'ILLUMINA', 'library_construction_protocol': 'AAA', 'experiment_design_description': 'BBB' } } md_template = pd.DataFrame.from_dict(metadata_dict, orient='index') prep_template = PrepTemplate.create(md_template, RawData(2), Study(1), '16S') out_dir = mkdtemp() obs_fps = sorted(_get_qiime_minimal_mapping(prep_template, out_dir)) exp_fps = sorted([ join(out_dir, 's_G1_L001_sequences_MMF.txt'), join(out_dir, 's_G1_L002_sequences_MMF.txt') ]) # Check that the returned list is as expected self.assertEqual(obs_fps, exp_fps) # Check that the file exists for fp in exp_fps: self.assertTrue(exists(fp)) # Check the contents of the file for fp, contents in zip(exp_fps, [EXP_PREP_1, EXP_PREP_2]): with open(fp, "U") as f: self.assertEqual(f.read(), contents)