def _extend_contigs_with_bam(self, bam_in, out_prefix=None, output_all_useful_reads=False): if out_prefix is not None: fa_out1 = pyfastaq.utils.open_file_write(out_prefix + '_1.fa') fa_out2 = pyfastaq.utils.open_file_write(out_prefix + '_2.fa') keep_read_types = set([mapping.CAN_EXTEND_LEFT, mapping.CAN_EXTEND_RIGHT, mapping.KEEP]) if output_all_useful_reads: keep_read_types.add(mapping.BOTH_UNMAPPED) previous_sam = None left_seqs = [] right_seqs = [] sam_reader = pysam.Samfile(bam_in, "rb") for current_sam in sam_reader.fetch(until_eof=True): if previous_sam is None: previous_sam = current_sam continue previous_type, current_type = mapping.get_pair_type(previous_sam, current_sam, self._get_ref_length_sam_pair(sam_reader, previous_sam, current_sam), self.max_insert, min_clip=self.min_clip) for sam, sam_type in [(previous_sam, previous_type), (current_sam, current_type)]: if sam_type == mapping.CAN_EXTEND_LEFT: name = mapping.get_ref_name(sam, sam_reader) clipped = mapping.soft_clipped(sam)[0] self.contigs[name].add_left_kmer(common.decode(sam.seq[:clipped])) elif sam_type == mapping.CAN_EXTEND_RIGHT: name = mapping.get_ref_name(sam, sam_reader) self.contigs[name].add_right_kmer(common.decode(sam.seq[sam.qend:])) if out_prefix is not None and sam_type in keep_read_types: if sam.is_read1: print(mapping.sam_to_fasta(sam), file=fa_out1) else: print(mapping.sam_to_fasta(sam), file=fa_out2) previous_sam = None if out_prefix is not None: pyfastaq.utils.close(fa_out1) pyfastaq.utils.close(fa_out2) total_bases_added = 0 for ctg in self.contigs: left_length, right_length = self.contigs[ctg].extend(self.ext_min_cov, self.ext_min_ratio, self.ext_bases) if self.verbose: print(' extend contig ' + ctg, 'new_length:' + str(len(self.contigs[ctg])), 'added_left:' + str(left_length), 'added_right:' + str(right_length), sep='\t') self.contig_lengths[ctg].append([len(self.contigs[ctg]), left_length, right_length]) total_bases_added += left_length + right_length return total_bases_added
def test_get_ref_name(self): '''Test get_ref_name''' expected = ['ref'] * 14 for i in ([5,10,11]): expected[i] = None sam_reader = pysam.Samfile(os.path.join(data_dir, 'mapping_test.smalt.out.bam'), "rb") i = 0 for sam in sam_reader.fetch(until_eof=True): self.assertEqual(mapping.get_ref_name(sam, sam_reader), expected[i]) i += 1
def _get_ref_length(self, samfile, sam): if sam.is_unmapped: return None else: return len(self.contigs[mapping.get_ref_name(sam, samfile)])