def test_validate_fasta(self): """ Overall module runs properly """ validate_fasta(self.sample_fasta_fp, self.sample_mapping_fp, self.output_dir) expected_log_fp = join(self.output_dir, split(self.sample_fasta_fp)[1] + "_report.log") log_f = open(expected_log_fp, "U") actual_log_lines = [line.strip() for line in log_f][1:] expected_log_lines = """Percent duplicate labels: 0.000 Percent QIIME-incompatible fasta labels: 0.000 Percent of labels that fail to map to SampleIDs: 0.000 Percent of sequences with invalid characters: 0.000 Percent of sequences with barcodes detected: 0.000 Percent of sequences with barcodes detected at the beginning of the sequence: 0.000 Percent of sequences with primers detected: 0.000""".split('\n') self.assertEqual(actual_log_lines, expected_log_lines) # Check with all optional values included validate_fasta(self.sample_fasta_fp, self.sample_mapping_fp, self.output_dir, tree_fp=self.sample_tree_5tips_fp, tree_subset=True, tree_exact_match=True, same_seq_lens=True, all_ids_found=True) expected_log_fp = join(self.output_dir, split(self.sample_fasta_fp)[1] + "_report.log") log_f = open(expected_log_fp, "U") actual_log_lines = [line.strip() for line in log_f][1:] expected_log_lines = """Percent duplicate labels: 0.000 Percent QIIME-incompatible fasta labels: 0.000 Percent of labels that fail to map to SampleIDs: 0.000 Percent of sequences with invalid characters: 0.000 Percent of sequences with barcodes detected: 0.000 Percent of sequences with barcodes detected at the beginning of the sequence: 0.000 Percent of sequences with primers detected: 0.000 Sequence lengths report Counts of sequences, followed by their sequence lengths: 1\t35 1\t32 1\t27 Sample ID in fasta sequences report The following SampleIDs were not found: seq2 Fasta label subset in tree tips report All fasta labels were a subset of tree tips. Fasta label/tree tip exact match report All fasta labels found in tree tips. The following tips were not in fasta labels: seq2 seq5 seq4""".split('\n') self.assertEqual(actual_log_lines, expected_log_lines)
def test_validate_fasta_suppress_primers_barcodes(self): """ Overall module test with primer/barcode check suppressed """ # Should raise errors when both primer/barcode check not suppressed self.assertRaises(ValueError, validate_fasta, self.sample_fasta_fp, self.sample_mapping_file_errors_fp, self.output_dir) self.assertRaises(ValueError, validate_fasta, self.sample_fasta_fp, self.sample_mapping_file_errors_fp, self.output_dir, suppress_primer_checks=True) self.assertRaises(ValueError, validate_fasta, self.sample_fasta_fp, self.sample_mapping_file_errors_fp, self.output_dir, suppress_barcode_checks=True) # No errors when both suppressed validate_fasta(self.sample_fasta_fp, self.sample_mapping_file_errors_fp, self.output_dir, suppress_primer_checks = True, suppress_barcode_checks = True) expected_log_fp = join(self.output_dir, split(self.sample_fasta_fp)[1] + "_report.log") log_f = open(expected_log_fp, "U") actual_log_lines = [line.strip() for line in log_f][1:] expected_log_lines = """Percent duplicate labels: 0.000 Percent QIIME-incompatible fasta labels: 0.000 Percent of labels that fail to map to SampleIDs: 0.000 Percent of sequences with invalid characters: 0.000 Percent of sequences with barcodes detected: 0.000 Percent of sequences with barcodes detected at the beginning of the sequence: 0.000 Percent of sequences with primers detected: 0.000""".split('\n') self.assertEqual(actual_log_lines, expected_log_lines)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) verbose = opts.verbose input_fasta_fp = opts.input_fasta_fp mapping_fp = opts.mapping_fp output_dir = opts.output_dir tree_fp = opts.tree_fp tree_subset = opts.tree_subset tree_exact_match = opts.tree_exact_match same_seq_lens = opts.same_seq_lens all_ids_found = opts.all_ids_found create_dir(output_dir) # Test optional filepaths and requirements try: test_mapping_fp = open(mapping_fp, "U") test_mapping_fp.close() except IOError: raise IOError,("Unable to open mapping file, please check " "filepath and read permissions.") if tree_fp: try: test_tree_fp = open(tree_fp, "U") test_tree_fp.close() except IOError: raise IOError,("Unable to open provided tree filepath, please "+\ "filepath and permissions.") if tree_subset or tree_exact_match: if not tree_fp: raise ValueError,('Must provide tree filepath if -s or -e options '+\ 'are enabled.') validate_fasta(input_fasta_fp, mapping_fp, output_dir, tree_fp, tree_subset, tree_exact_match, same_seq_lens, all_ids_found, opts.suppress_barcode_checks, opts.suppress_primer_checks)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) verbose = opts.verbose input_fasta_fp = opts.input_fasta_fp mapping_fp = opts.mapping_fp output_dir = opts.output_dir tree_fp = opts.tree_fp tree_subset = opts.tree_subset tree_exact_match = opts.tree_exact_match same_seq_lens = opts.same_seq_lens all_ids_found = opts.all_ids_found create_dir(output_dir) # Test optional filepaths and requirements try: test_mapping_fp = open(mapping_fp, "U") test_mapping_fp.close() except IOError: raise IOError("Unable to open mapping file, please check " "filepath and read permissions.") if tree_fp: try: test_tree_fp = open(tree_fp, "U") test_tree_fp.close() except IOError: raise IOError("Unable to open provided tree filepath, please " + "filepath and permissions.") if tree_subset or tree_exact_match: if not tree_fp: raise ValueError('Must provide tree filepath if -s or -e options ' + 'are enabled.') validate_fasta( input_fasta_fp, mapping_fp, output_dir, tree_fp, tree_subset, tree_exact_match, same_seq_lens, all_ids_found, opts.suppress_barcode_checks, opts.suppress_primer_checks)
def test_validate_fasta_with_invalid(self): """ Overall module runs properly """ validate_fasta(self.sample_fasta_invalid_fp, self.sample_mapping_fp, self.output_dir) expected_log_fp = join(self.output_dir, split(self.sample_fasta_invalid_fp)[1] + "_report.log") log_f = open(expected_log_fp, "U") actual_log_lines = [line.strip() for line in log_f][1:] expected_log_lines = """Percent duplicate labels: 0.250 Percent QIIME-incompatible fasta labels: 0.500 Percent of labels that fail to map to SampleIDs: 0.750 Percent of sequences with invalid characters: 0.500 Percent of sequences with barcodes detected: 0.250 Percent of sequences with barcodes detected at the beginning of the sequence: 0.000 Percent of sequences with primers detected: 0.250""".split('\n') self.assertEqual(actual_log_lines, expected_log_lines)
def test_validate_fasta_with_invalid(self): """ Overall module runs properly """ validate_fasta(self.sample_fasta_invalid_fp, self.sample_mapping_fp, self.output_dir) expected_log_fp = join( self.output_dir, split(self.sample_fasta_invalid_fp)[1] + "_report.log") log_f = open(expected_log_fp, "U") actual_log_lines = [line.strip() for line in log_f][1:] expected_log_lines = """Percent duplicate labels: 0.250 Percent QIIME-incompatible fasta labels: 0.500 Percent of labels that fail to map to SampleIDs: 0.750 Percent of sequences with invalid characters: 0.500 Percent of sequences with barcodes detected: 0.250 Percent of sequences with barcodes detected at the beginning of the sequence: 0.000 Percent of sequences with primers detected: 0.250""".split('\n') self.assertEqual(actual_log_lines, expected_log_lines)