def test_end_sortsAndIndexes(self): #pylint: disable=no-member with TempDirectory() as input_dir, TempDirectory() as output_dir: input_bam_filename = os.path.join(input_dir.path, "input.bam") make_bam_file(input_bam_filename, [build_read()]) output_bam_filename = os.path.join(output_dir.path, "output.bam") mock_log = MockLog() handler = readhandler.WriteReadHandler(input_bam_filename, output_bam_filename, log_method=mock_log.log) read1 = build_read(query_name="read1", reference_id=0, reference_start=20) read2 = build_read(query_name="read2", reference_id=0, reference_start=10) handler.begin() handler.handle(read1, None, None) handler.handle(read2, None, None) handler.end() actual_files = sorted(os.listdir(output_dir.path)) self.assertEquals(["output.bam", "output.bam.bai"], actual_files) actual_bam = pysam.AlignmentFile(output_bam_filename, "rb") actual_reads = [read for read in actual_bam.fetch()] actual_bam.close() self.assertEquals(2, len(actual_reads)) self.assertEquals("read2", actual_reads[0].query_name) self.assertEquals("read1", actual_reads[1].query_name)
def test_main(self): primer_file_content = \ '''Customer TargetID|Chr|Sense Start|Antisense Start|Sense Sequence|Antisense Sequence primer1|1|101|200|AAGG|CCTT primer2|2|501|600|CGCG|ATAT '''.replace("|", "\t") CIGAR_10M = ((0, 10), ) readA1 = build_read(query_name="readA", query_sequence="AGCTTAGCTA", flag=99, reference_id=0, reference_start=100, cigar=CIGAR_10M, next_reference_id=0, next_reference_start=190, template_length=80) readA2 = build_read(query_name="readA", query_sequence="AGCTTAGCTA", flag=147, reference_id=0, reference_start=190, cigar=CIGAR_10M, next_reference_id=0, next_reference_start=100, template_length=80) readB1 = build_read(query_name="readB", query_sequence="AGCTTAGCTA", flag=0, reference_id=1, reference_start=242, cigar=CIGAR_10M, next_reference_id=0, next_reference_start=0, template_length=0) with TempDirectory() as input_dir, TempDirectory() as output_dir: input_bam_filename = os.path.join(input_dir.path, "input.bam") output_bam_filename = os.path.join(output_dir.path, "output.bam") input_primers_filename = self._create_file(input_dir.path, 'primers.txt', primer_file_content) make_bam_file(input_bam_filename, [readA1, readA2, readB1]) clipper.main([ "katana", input_primers_filename, input_bam_filename, output_bam_filename ]) actual = self._bam_to_sam(output_bam_filename) self.assertRegexpMatches(actual[0], "readA.*chr1.*105.*4S6M.*191") self.assertRegexpMatches(actual[1], "readA.*chr1.*191.*6M4S.*105") self.assertEquals(2, len(actual))
def test_check_input_bam_barcoded_okAtThreshold(self): sam_contents = \ '''@HD|VN:1.4|GO:none|SO:coordinate @SQ|SN:chr10|LN:135534747 readNameA1|99|chr10|100|20|8M|=|300|200|NNNAAAAA|>>>>>>>> readNameA1|147|chr10|100|20|8M|=|300|200|AAAAANNN|>>>>>>>> readNameA2|99|chr10|100|20|3S5M|=|300|200|NNNAAAAA|>>>>>>>> readNameA2|147|chr10|100|20|5M3S|=|300|200|AAAAANNN|>>>>>>>> readNameA3|99|chr10|100|20|3S5M|=|300|200|NNNAAAAA|>>>>>>>> readNameA3|147|chr10|100|20|5M3S|=|300|200|AAAAANNN|>>>>>>>> readNameA4|99|chr10|100|20|3S5M|=|300|200|NNNAAAAA|>>>>>>>> readNameA4|147|chr10|100|20|5M3S|=|300|200|AAAAANNN|>>>>>>>> readNameA5|99|chr10|100|20|3S5M|=|300|200|NNNAAAAA|>>>>>>>> readNameA5|147|chr10|100|20|5M3S|=|300|200|AAAAANNN|>>>>>>>> ''' sam_contents = sam_contents.replace("|", "\t") with TempDirectory() as tmp_dir: input_bam_path = samtools_test.create_bam(tmp_dir.path, "input.sam", sam_contents, index=True) args = Namespace(input_bam=input_bam_path, force=False) validator._check_input_bam_barcoded(args, self.mock_logger) self.ok() self.assertEquals(0, len(self.mock_logger._log_calls))
def test_close_sortsAndIndexes(self): with TempDirectory() as tmp_dir: bam_path = os.path.join(tmp_dir.path, 'destination.bam') header = { 'HD': {'VN': '1.0'}, 'SQ': [{'LN': 1575, 'SN': 'chr1'}, {'LN': 1584, 'SN': 'chr2'}] } align1 = ConnorAlign(mock_align(query_name='align1', reference_start=100)) align2 = ConnorAlign(mock_align(query_name='align2', reference_start=200)) align3 = ConnorAlign(mock_align(query_name='align3', reference_start=300)) tag1 = BamTag('X1','Z', 'desc', get_value=lambda family, pair, align: family) tag2 = BamTag('X2','Z', 'desc', get_value=lambda family, pair, align: align.query_name) writer = samtools.AlignWriter(header, bam_path, [tag1, tag2]) writer.write('familyC', None, align3) writer.write('familyA', None, align1) writer.write('familyB', None, align2) writer.close() bamfile = samtools.alignment_file(bam_path, 'rb') actual_aligns = [a for a in bamfile.fetch()] bamfile.close() self.assertEqual(3, len(actual_aligns)) self.assertEqual('align1', actual_aligns[0].query_name) self.assertEqual('align2', actual_aligns[1].query_name) self.assertEqual('align3', actual_aligns[2].query_name)
def test_write_removesTagsWhenValueIsNone(self): with TempDirectory() as tmp_dir: bam_path = os.path.join(tmp_dir.path, 'destination.bam') header = { 'HD': {'VN': '1.0'}, 'SQ': [{'LN': 1575, 'SN': 'chr1'}, {'LN': 1584, 'SN': 'chr2'}] } align1 = ConnorAlign(mock_align(query_name='align1')) align1.set_tag('X1', 'No', 'Z') tag1 = BamTag('X1','Z', 'desc', get_value = lambda family, pair, align: None) writer = samtools.AlignWriter(header, bam_path, [tag1]) writer.write('familyA', None, align1) writer.close() bamfile = samtools.alignment_file(bam_path, 'rb') actual_aligns = [a for a in bamfile.fetch()] bamfile.close() align_tags = {} for actual_align in actual_aligns: for t_name, t_val, t_type in actual_align.get_tags(with_value_type=True): key = (actual_align.query_name, t_name) t_type = AlignWriterTest.fix_pysam_inconsistent_tag_type(t_type) align_tags[key] = "{}:{}:{}".format(t_name, t_type, t_val) self.assertEqual(1, len(actual_aligns)) self.assertEqual(0, len(align_tags))
def test_check_input_bam_barcoded_rightUnbarcodedRaisesUsageError(self): sam_contents = \ '''@HD|VN:1.4|GO:none|SO:coordinate @SQ|SN:chr10|LN:135534747 readNameA1|99|chr10|100|20|8M|=|300|200|NNNAAAAA|>>>>>>>> readNameA1|147|chr10|100|20|8M|=|300|200|AAAAANNN|>>>>>>>> readNameA2|99|chr10|100|20|3S5M|=|300|200|NNNAAAAA|>>>>>>>> readNameA2|147|chr10|100|20|8M|=|300|200|AAAAANNN|>>>>>>>> readNameA3|99|chr10|100|20|3S5M|=|300|200|NNNAAAAA|>>>>>>>> readNameA3|147|chr10|100|20|5M3S|=|300|200|AAAAANNN|>>>>>>>> readNameA4|99|chr10|100|20|3S5M|=|300|200|NNNAAAAA|>>>>>>>> readNameA4|147|chr10|100|20|5M3S|=|300|200|AAAAANNN|>>>>>>>> readNameA5|99|chr10|100|20|3S5M|=|300|200|NNNAAAAA|>>>>>>>> readNameA5|147|chr10|100|20|5M3S|=|300|200|AAAAANNN|>>>>>>>> ''' sam_contents = sam_contents.replace("|", "\t") with TempDirectory() as tmp_dir: input_bam_path = samtools_test.create_bam(tmp_dir.path, "input.sam", sam_contents, index=True) args = Namespace(input_bam=input_bam_path, force=False) regex = r'\[.*input.bam\] reads do not appear to have barcodes' self.assertRaisesRegexp(utils.UsageError, regex, validator._check_input_bam_barcoded, args)
def test_check_input_bam_exists_raisesUsageError(self): with TempDirectory() as tmp_dir: input_bam_path = os.path.join(tmp_dir.path, 'input.bam') args = Namespace(input_bam=input_bam_path) self.assertRaisesRegexp(utils.UsageError, r'\[.*input.bam\] does not exist', validator._check_input_bam_exists, args)
def test_check_input_bam_exists_ok(self): with TempDirectory() as tmp_dir: tmp_dir.write('input.bam', b'foo') input_bam_path = os.path.join(tmp_dir.path, 'input.bam') args = Namespace(input_bam=input_bam_path) validator._check_input_bam_exists(args) self.ok()
def test_check_input_bam_barcoded_warnIfForced(self): sam_contents = \ '''@HD|VN:1.4|GO:none|SO:coordinate @SQ|SN:chr10|LN:135534747 readNameA1|99|chr10|100|20|8M|=|300|200|NNNAAAAA|>>>>>>>> readNameA1|147|chr10|100|20|8M|=|300|200|AAAAANNN|>>>>>>>> readNameA2|99|chr10|100|20|8M|=|300|200|NNNAAAAA|>>>>>>>> readNameA2|147|chr10|100|20|8M|=|300|200|AAAAANNN|>>>>>>>> readNameA3|99|chr10|100|20|3S5M|=|300|200|NNNAAAAA|>>>>>>>> readNameA3|147|chr10|100|20|5M3S|=|300|200|AAAAANNN|>>>>>>>> readNameA4|99|chr10|100|20|3S5M|=|300|200|NNNAAAAA|>>>>>>>> readNameA4|147|chr10|100|20|5M3S|=|300|200|AAAAANNN|>>>>>>>> readNameA5|99|chr10|100|20|3S5M|=|300|200|NNNAAAAA|>>>>>>>> readNameA5|147|chr10|100|20|5M3S|=|300|200|AAAAANNN|>>>>>>>> ''' sam_contents = sam_contents.replace("|", "\t") with TempDirectory() as tmp_dir: input_bam_path = samtools_test.create_bam(tmp_dir.path, "input.sam", sam_contents, index=True) args = Namespace(input_bam=input_bam_path, force=True) validator._check_input_bam_barcoded(args, self.mock_logger) warnings = self.mock_logger._log_calls['WARNING'] self.assertEqual(1, len(warnings)) regex = r'\[.*input.bam\] reads do not appear to have barcodes.*forcing' self.assertRegexpMatches(warnings[0], regex)
def test_check_input_bam_valid_raisesUsageError(self): with TempDirectory() as tmp_dir: tmp_dir.write('input.bam', b'foo') input_bam_path = os.path.join(tmp_dir.path, 'input.bam') args = Namespace(input_bam=input_bam_path) self.assertRaisesRegexp(utils.UsageError, r'\[.*input.bam\] not a valid BAM', validator._check_input_bam_valid, args)
def test_init_defaultToNoTags(self): with TempDirectory() as tmp_dir: bam_path = os.path.join(tmp_dir.path, "destination.bam") header = { 'HD': {'VN': '1.0'}, 'SQ': [{'LN': 1575, 'SN': 'chr1'}, {'LN': 1584, 'SN': 'chr2'}] } writer = samtools.AlignWriter(header, bam_path) writer.close() self.assertEqual([], writer._tags)
def test_check_overwrite_output_ok(self): with TempDirectory() as tmp_dir: # tmp_dir.write('input.bam', b'foo') deduped_bam_path = os.path.join(tmp_dir.path, 'deduped.bam') annotated_bam_path = os.path.join(tmp_dir.path, 'annotated.bam') args = Namespace(output_bam=deduped_bam_path, annotated_output_bam=annotated_bam_path, force=False) validator._check_overwrite_output(args, self.mock_logger) self.ok() self.assertEqual(0, len(self.mock_logger._log_calls))
def test_sort_and_index_bam(self): sam_contents = \ '''@HD|VN:1.4|GO:none|SO:coordinate @SQ|SN:chr10|LN:135534747 readNameB1|147|chr10|400|0|5M|=|200|100|CCCCC|>>>>> readNameA1|147|chr10|300|0|5M|=|100|100|AAAAA|>>>>> readNameA1|99|chr10|100|0|5M|=|300|200|AAAAA|>>>>> readNameB1|99|chr10|200|0|5M|=|400|200|CCCCC|>>>>> readNameA2|147|chr10|300|0|5M|=|100|100|AAAAA|>>>>> readNameA2|99|chr10|100|0|5M|=|300|200|AAAAA|>>>>> '''.replace("|", "\t") with TempDirectory() as tmp_dir: bam = create_bam(tmp_dir.path, "input.sam", sam_contents, index=False) samtools.sort_and_index_bam(bam) alignments = samtools.alignment_file(bam, "rb").fetch() aligns = [(a.query_name, a.reference_start + 1) for a in alignments] self.assertEquals(6, len(aligns)) self.assertEquals([("readNameA1", 100), ("readNameA2", 100), ("readNameB1", 200), ("readNameA1", 300), ("readNameA2", 300), ("readNameB1", 400)], aligns) original_dir = os.getcwd() try: os.chdir(tmp_dir.path) os.mkdir("tmp") bam = create_bam(os.path.join(tmp_dir.path, "tmp"), "input.sam", sam_contents, index=False) bam_filename = os.path.basename(bam) samtools.sort_and_index_bam(os.path.join("tmp", bam_filename)) alignments = samtools.alignment_file(bam, "rb").fetch() aligns = [(a.query_name, a.reference_start + 1) for a in alignments] self.assertEquals(6, len(aligns)) self.assertEquals([("readNameA1", 100), ("readNameA2", 100), ("readNameB1", 200), ("readNameA1", 300), ("readNameA2", 300), ("readNameB1", 400)], aligns) finally: os.chdir(original_dir)
def test_check_input_bam_indexed_ok(self): sam_contents = \ '''@HD|VN:1.4|GO:none|SO:coordinate @SQ|SN:chr10|LN:135534747 readNameA1|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>>'''.replace("|", "\t") with TempDirectory() as tmp_dir: input_bam_path = samtools_test.create_bam(tmp_dir.path, "input.sam", sam_contents, index=True) args = Namespace(input_bam=input_bam_path) validator._check_input_bam_indexed(args) self.ok()
def test_check_overwrite_output_raisesUsageErrorIfAnnotatedPresent(self): with TempDirectory() as tmp_dir: tmp_dir.write('annotated.bam', b'foo') deduped_bam_path = os.path.join(tmp_dir.path, 'deduped.bam') annotated_bam_path = os.path.join(tmp_dir.path, 'annotated.bam') args = Namespace(output_bam=deduped_bam_path, annotated_output_bam=annotated_bam_path, force=False) self.assertRaisesRegexp(utils.UsageError, r'\[.*annotated.bam\] exist.*force', validator._check_overwrite_output, args, self.mock_logger) self.assertEqual(0, len(self.mock_logger._log_calls))
def test_check_overwrite_output_warnIfForced(self): with TempDirectory() as tmp_dir: tmp_dir.write('deduped.bam', b'foo') tmp_dir.write('annotated.bam', b'bar') deduped_bam_path = os.path.join(tmp_dir.path, 'deduped.bam') annotated_bam_path = os.path.join(tmp_dir.path, 'annotated.bam') args = Namespace(output_bam=deduped_bam_path, annotated_output_bam=annotated_bam_path, force=True) validator._check_overwrite_output(args, self.mock_logger) warnings = self.mock_logger._log_calls['WARNING'] regex = r'\[.*deduped.bam, .*annotated.bam\] exist.*forcing' self.assertEqual(1, len(warnings)) self.assertRegexpMatches(warnings[0], regex)
def test_check_input_bam_indexed_raisesUsageError(self): sam_contents = \ '''@HD|VN:1.4|GO:none|SO:coordinate @SQ|SN:chr10|LN:135534747 readNameA1|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>>'''.replace("|", "\t") with TempDirectory() as tmp_dir: input_bam_path = samtools_test.create_bam(tmp_dir.path, "input.sam", sam_contents, index=False) args = Namespace(input_bam=input_bam_path) self.assertRaisesRegexp(utils.UsageError, r'\[.*input.bam\] is not indexed', validator._check_input_bam_indexed, args)
def test_check_input_bam_not_empty_raiseUsageError(self): sam_contents = \ '''@HD|VN:1.4|GO:none|SO:coordinate @SQ|SN:chr10|LN:135534747 '''.replace("|", "\t") with TempDirectory() as tmp_dir: input_bam_path = samtools_test.create_bam(tmp_dir.path, "input.sam", sam_contents, index=True) args = Namespace(input_bam=input_bam_path, force=False) self.assertRaisesRegexp(utils.UsageError, r'\[.*input.bam\] is empty', validator._check_input_bam_not_empty, args)
def test_check_input_bam_not_deduped_ok(self): sam_contents = \ '''@HD|VN:1.4|GO:none|SO:coordinate @SQ|SN:chr10|LN:135534747 @PG|ID:foo|PN:bwa readNameA1|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>>'''.replace("|", "\t") with TempDirectory() as tmp_dir: input_bam_path = samtools_test.create_bam(tmp_dir.path, "input.sam", sam_contents, index=True) args = Namespace(input_bam=input_bam_path, force=True) validator._check_input_bam_not_deduped(args, self.mock_logger) self.ok() self.assertEqual(0, len(self.mock_logger._log_calls))
def test_check_input_bam_paired_raisesUsageError(self): sam_contents = \ '''@HD|VN:1.4|GO:none|SO:coordinate @SQ|SN:chr10|LN:135534747 readNameA1|{flag}|chr10|100|20|5M|=|300|200|AAAAA|>>>>>''' sam_contents = sam_contents.format(flag='16').replace("|", "\t") with TempDirectory() as tmp_dir: input_bam_path = samtools_test.create_bam(tmp_dir.path, "input.sam", sam_contents, index=True) args = Namespace(input_bam=input_bam_path, force=False) regex = r'\[.*input.bam\] does not appear to contain paired reads' self.assertRaisesRegexp(utils.UsageError, regex, validator._check_input_bam_paired, args)
def test_close_logs(self): with TempDirectory() as tmp_dir: bam_path = os.path.join(tmp_dir.path, 'destination.bam') header = { 'HD': {'VN': '1.0'}, 'SQ': [{'LN': 1575, 'SN': 'chr1'}, {'LN': 1584, 'SN': 'chr2'}] } align1 = ConnorAlign(mock_align(query_name='align1', reference_start=100)) writer = samtools.AlignWriter(header, bam_path, []) writer.write('familyA', None, align1) writer.close(log=self.mock_logger) info_log_lines = self.mock_logger._log_calls['INFO'] self.assertEqual(1, len(info_log_lines)) self.assertRegexpMatches(info_log_lines[0], 'destination.bam')
def test_check_input_bam_not_deduped_raisesUsageError(self): sam_contents = \ '''@HD|VN:1.4|GO:none|SO:coordinate @SQ|SN:chr10|LN:135534747 @PG|ID:foo|PN:connor readNameA1|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>>'''.replace("|", "\t") with TempDirectory() as tmp_dir: input_bam_path = samtools_test.create_bam(tmp_dir.path, "input.sam", sam_contents, index=True) args = Namespace(input_bam=input_bam_path, force=False) regex = (r'\[.*input.bam\] has already been processed with Connor' r'.*Are you sure.*force') self.assertRaisesRegexp(utils.UsageError, regex, validator._check_input_bam_not_deduped, args)
def test_check_input_bam_paired_ok(self): sam_contents = \ '''@HD|VN:1.4|GO:none|SO:coordinate @SQ|SN:chr10|LN:135534747 readNameA1|{unpaired_flag}|chr10|100|20|5M|=|300|200|AAAAA|>>>>> readNameA1|{paired_flag}|chr10|100|20|5M|=|300|200|AAAAA|>>>>>''' sam_contents = sam_contents.format(unpaired_flag='16', paired_flag='99') sam_contents = sam_contents.replace("|", "\t") with TempDirectory() as tmp_dir: input_bam_path = samtools_test.create_bam(tmp_dir.path, "input.sam", sam_contents, index=True) args = Namespace(input_bam=input_bam_path, force=True) validator._check_input_bam_paired(args, self.mock_logger) self.ok() self.assertEqual(0, len(self.mock_logger._log_calls))
def test_write_addsAlignTags(self): with TempDirectory() as tmp_dir: bam_path = os.path.join(tmp_dir.path, 'destination.bam') header = { 'HD': {'VN': '1.0'}, 'SQ': [{'LN': 1575, 'SN': 'chr1'}, {'LN': 1584, 'SN': 'chr2'}] } align1 = ConnorAlign(mock_align(query_name='align1')) align2 = ConnorAlign(mock_align(query_name='align2')) align3 = ConnorAlign(mock_align(query_name='align3')) tag1 = BamTag('X1','Z', 'desc', get_value=lambda family,pair,align: family) tag2 = BamTag('X2','Z', 'desc', get_value=lambda family,pair,align: pair) tag3 = BamTag('X3','Z', 'desc', get_value=lambda family,pair,align: align.query_name) writer = samtools.AlignWriter(header, bam_path, [tag1, tag2, tag3]) writer.write('familyA', 'pair1', align1) writer.write('familyB', 'pair2', align2) writer.write('familyC', 'pair3', align3) writer.close() bamfile = samtools.alignment_file(bam_path, 'rb') actual_aligns = [a for a in bamfile.fetch()] bamfile.close() align_tags = {} for actual_align in actual_aligns: for t_name, t_val, t_type in actual_align.get_tags(with_value_type=True): key = (actual_align.query_name, t_name) t_type = AlignWriterTest.fix_pysam_inconsistent_tag_type(t_type) align_tags[key] = "{}:{}:{}".format(t_name, t_type, t_val) self.assertEqual(3, len(actual_aligns)) self.assertEqual("X1:Z:familyA", align_tags[('align1', 'X1')]) self.assertEqual("X1:Z:familyB", align_tags[('align2', 'X1')]) self.assertEqual("X1:Z:familyC", align_tags[('align3', 'X1')]) self.assertEqual("X2:Z:pair1", align_tags[('align1', 'X2')]) self.assertEqual("X2:Z:pair2", align_tags[('align2', 'X2')]) self.assertEqual("X2:Z:pair3", align_tags[('align3', 'X2')]) self.assertEqual("X3:Z:align1", align_tags[('align1', 'X3')]) self.assertEqual("X3:Z:align2", align_tags[('align2', 'X3')]) self.assertEqual("X3:Z:align3", align_tags[('align3', 'X3')])
def test_check_input_bam_not_deduped_warnIfForced(self): sam_contents = \ '''@HD|VN:1.4|GO:none|SO:coordinate @SQ|SN:chr10|LN:135534747 @PG|ID:foo|PN:connor readNameA1|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>>'''.replace("|", "\t") with TempDirectory() as tmp_dir: input_bam_path = samtools_test.create_bam(tmp_dir.path, "input.sam", sam_contents, index=True) args = Namespace(input_bam=input_bam_path, force=True) validator._check_input_bam_not_deduped(args, log=self.mock_logger) warnings = self.mock_logger._log_calls['WARNING'] self.assertEqual(1, len(warnings)) regex = (r'\[.*input.bam\] has already been processed with Connor' r'.*forcing') self.assertRegexpMatches(warnings[0], regex)
def test_check_input_bam_paired_warnIfForced(self): sam_contents = \ '''@HD|VN:1.4|GO:none|SO:coordinate @SQ|SN:chr10|LN:135534747 readNameA1|{flag}|chr10|100|20|5M|=|300|200|AAAAA|>>>>>''' sam_contents = sam_contents.format(flag='16').replace("|", "\t") with TempDirectory() as tmp_dir: input_bam_path = samtools_test.create_bam(tmp_dir.path, "input.sam", sam_contents, index=True) args = Namespace(input_bam=input_bam_path, force=True) validator._check_input_bam_paired(args, self.mock_logger) warnings = self.mock_logger._log_calls['WARNING'] self.assertEqual(1, len(warnings)) regex = (r'\[.*input.bam\] does not appear to contain paired ' r'reads.*forcing') self.assertRegexpMatches(warnings[0], regex)
def test_check_input_bam_consistent_length_warnIfForced(self): sam_contents = \ '''@HD|VN:1.4|GO:none|SO:coordinate @SQ|SN:chr10|LN:135534747 readNameA1|99|chr10|100|20|10M|=|300|200|AAAAANNNNN|>>>>>!!!!! readNameA2|99|chr10|100|20|8M|=|300|200|AAAAANNN|>>>>>!!! readNameA3|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>> readNameA4|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>> readNameA5|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>> readNameA6|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>> readNameA7|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>> readNameA8|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>> readNameA9|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>> readNameA0|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>> readNameA1|147|chr10|100|20|10M|=|300|200|AAAAANNNNN|>>>>>!!!!! readNameA2|147|chr10|100|20|5M|=|300|200|AAANN|>>>!! readNameA3|147|chr10|100|20|3M|=|300|200|AAA|>>> readNameA4|147|chr10|100|20|3M|=|300|200|AAA|>>> readNameA5|147|chr10|100|20|3M|=|300|200|AAA|>>> readNameA6|147|chr10|100|20|3M|=|300|200|AAA|>>> readNameA7|147|chr10|100|20|3M|=|300|200|AAA|>>> readNameA8|147|chr10|100|20|3M|=|300|200|AAA|>>> readNameA9|147|chr10|100|20|3M|=|300|200|AAA|>>> readNameA0|147|chr10|100|20|3M|=|300|200|AAA|>>> ''' sam_contents = sam_contents.replace("|", "\t") with TempDirectory() as tmp_dir: input_bam_path = samtools_test.create_bam(tmp_dir.path, "input.sam", sam_contents, index=True) args = Namespace(input_bam=input_bam_path, force=True) validator._check_input_bam_consistent_length( args, self.mock_logger) self.ok() warnings = self.mock_logger._log_calls['WARNING'] self.assertEqual(1, len(warnings)) regex = (r'\[.*input.bam\] reads appear to have inconsistent ' r'sequence lengths\..*forcing') self.assertRegexpMatches(warnings[0], regex)
def test_total_align_count(self): self.check_sysout_safe() sam_contents = \ '''@HD|VN:1.4|GO:none|SO:coordinate @SQ|SN:chr10|LN:135534747 readNameB1|147|chr10|400|0|5M|=|200|100|CCCCC|>>>>> readNameA1|147|chr10|300|0|5M|=|100|100|AAAAA|>>>>> readNameA1|99|chr10|100|0|5M|=|300|200|AAAAA|>>>>> readNameB1|99|chr10|200|0|5M|=|400|200|CCCCC|>>>>> readNameC1|12|chr10|400|0|*|=|200|100|CCCCC|>>>>> readNameC1|12|chr10|400|0|*|=|200|100|CCCCC|>>>>> readNameZ1|77|*|0|0|*|*|0|0|TTTTT|>>>>> readNameZ1|141|*|0|0|*|*|0|0|GGGGG|>>>>> '''.replace("|", "\t") with TempDirectory() as tmp_dir: input_bam = create_bam(tmp_dir.path, 'input.sam', sam_contents, index=False) samtools.sort_and_index_bam(input_bam) actual_count = samtools.total_align_count(input_bam) self.assertEqual(6, actual_count)
def test_write(self): with TempDirectory() as tmp_dir: bam_path = os.path.join(tmp_dir.path, "destination.bam") header = { 'HD': {'VN': '1.0'}, 'SQ': [{'LN': 1575, 'SN': 'chr1'}, {'LN': 1584, 'SN': 'chr2'}] } align1 = ConnorAlign(mock_align(query_name="align1")) align2 = ConnorAlign(mock_align(query_name="align2")) align3 = ConnorAlign(mock_align(query_name="align3")) family = None writer = samtools.AlignWriter(header, bam_path) writer.write(family, None, align1) writer.write(family, None, align2) writer.write(family, None, align3) writer.close() bamfile = samtools.alignment_file(bam_path, 'rb') actual_query_names = [align.query_name for align in bamfile.fetch()] bamfile.close() self.assertEqual(['align1', 'align2', 'align3'], actual_query_names)
def test_check_input_bam_consistent_length_negRaisesUsageError(self): sam_contents = \ '''@HD|VN:1.4|GO:none|SO:coordinate @SQ|SN:chr10|LN:135534747 readNameA1|99|chr10|100|20|10M|=|300|200|AAAAANNNNN|>>>>>!!!!! readNameA2|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>> readNameA3|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>> readNameA4|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>> readNameA5|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>> readNameA6|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>> readNameA7|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>> readNameA8|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>> readNameA9|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>> readNameA0|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>> readNameA1|147|chr10|100|20|10M|=|300|200|AAAAANNNNN|>>>>>!!!!! readNameA2|147|chr10|100|20|5M|=|300|200|AAANN|>>>!! readNameA3|147|chr10|100|20|3M|=|300|200|AAA|>>> readNameA4|147|chr10|100|20|3M|=|300|200|AAA|>>> readNameA5|147|chr10|100|20|3M|=|300|200|AAA|>>> readNameA6|147|chr10|100|20|3M|=|300|200|AAA|>>> readNameA7|147|chr10|100|20|3M|=|300|200|AAA|>>> readNameA8|147|chr10|100|20|3M|=|300|200|AAA|>>> readNameA9|147|chr10|100|20|3M|=|300|200|AAA|>>> readNameA0|147|chr10|100|20|3M|=|300|200|AAA|>>> ''' sam_contents = sam_contents.replace("|", "\t") with TempDirectory() as tmp_dir: input_bam_path = samtools_test.create_bam(tmp_dir.path, "input.sam", sam_contents, index=True) args = Namespace(input_bam=input_bam_path, force=False) regex = (r'\[.*input.bam\] reads appear to have inconsistent ' r'sequence lengths\..*force') self.assertRaisesRegexp(UsageError, regex, validator._check_input_bam_consistent_length, args)