Пример #1
0
    def _extend_contigs_with_bam(self, bam_in, out_prefix=None, output_all_useful_reads=False):
        if out_prefix is not None:
            fa_out1 = pyfastaq.utils.open_file_write(out_prefix + '_1.fa')
            fa_out2 = pyfastaq.utils.open_file_write(out_prefix + '_2.fa')
        keep_read_types = set([mapping.CAN_EXTEND_LEFT, mapping.CAN_EXTEND_RIGHT, mapping.KEEP])
        if output_all_useful_reads:
            keep_read_types.add(mapping.BOTH_UNMAPPED)
        previous_sam = None
        left_seqs = []
        right_seqs = []
        sam_reader = pysam.Samfile(bam_in, "rb")

        for current_sam in sam_reader.fetch(until_eof=True):
            if previous_sam is None:
                previous_sam = current_sam
                continue

            previous_type, current_type = mapping.get_pair_type(previous_sam, current_sam, self._get_ref_length_sam_pair(sam_reader, previous_sam, current_sam), self.max_insert, min_clip=self.min_clip)

            for sam, sam_type in [(previous_sam, previous_type), (current_sam, current_type)]:
                if sam_type == mapping.CAN_EXTEND_LEFT:
                    name = mapping.get_ref_name(sam, sam_reader)
                    clipped = mapping.soft_clipped(sam)[0]
                    self.contigs[name].add_left_kmer(common.decode(sam.seq[:clipped]))
                elif sam_type == mapping.CAN_EXTEND_RIGHT:
                    name = mapping.get_ref_name(sam, sam_reader)
                    self.contigs[name].add_right_kmer(common.decode(sam.seq[sam.qend:]))

                if out_prefix is not None and sam_type in keep_read_types:
                    if sam.is_read1:
                        print(mapping.sam_to_fasta(sam), file=fa_out1)
                    else:
                        print(mapping.sam_to_fasta(sam), file=fa_out2)

            previous_sam = None

        if out_prefix is not None:
            pyfastaq.utils.close(fa_out1)
            pyfastaq.utils.close(fa_out2)
        total_bases_added = 0

        for ctg in self.contigs:
            left_length, right_length = self.contigs[ctg].extend(self.ext_min_cov, self.ext_min_ratio, self.ext_bases)
            if self.verbose:
                print('    extend contig ' +  ctg, 'new_length:' + str(len(self.contigs[ctg])), 'added_left:' + str(left_length), 'added_right:' + str(right_length), sep='\t')
            self.contig_lengths[ctg].append([len(self.contigs[ctg]), left_length, right_length])
            total_bases_added += left_length + right_length

        return total_bases_added
Пример #2
0
    def _extend_contigs_with_bam(self, bam_in, out_prefix=None, output_all_useful_reads=False):
        if out_prefix is not None:
            fa_out1 = pyfastaq.utils.open_file_write(out_prefix + '_1.fa')
            fa_out2 = pyfastaq.utils.open_file_write(out_prefix + '_2.fa')
        keep_read_types = set([mapping.CAN_EXTEND_LEFT, mapping.CAN_EXTEND_RIGHT, mapping.KEEP])
        if output_all_useful_reads:
            keep_read_types.add(mapping.BOTH_UNMAPPED)
        previous_sam = None
        left_seqs = []
        right_seqs = []
        sam_reader = pysam.Samfile(bam_in, "rb")

        for current_sam in sam_reader.fetch(until_eof=True):
            if previous_sam is None:
                previous_sam = current_sam
                continue

            previous_type, current_type = mapping.get_pair_type(previous_sam, current_sam, self._get_ref_length_sam_pair(sam_reader, previous_sam, current_sam), self.max_insert, min_clip=self.min_clip)

            for sam, sam_type in [(previous_sam, previous_type), (current_sam, current_type)]:
                if sam_type == mapping.CAN_EXTEND_LEFT:
                    name = mapping.get_ref_name(sam, sam_reader)
                    clipped = mapping.soft_clipped(sam)[0]
                    self.contigs[name].add_left_kmer(common.decode(sam.seq[:clipped]))
                elif sam_type == mapping.CAN_EXTEND_RIGHT:
                    name = mapping.get_ref_name(sam, sam_reader)
                    self.contigs[name].add_right_kmer(common.decode(sam.seq[sam.qend:]))

                if out_prefix is not None and sam_type in keep_read_types:
                    if sam.is_read1:
                        print(mapping.sam_to_fasta(sam), file=fa_out1)
                    else:
                        print(mapping.sam_to_fasta(sam), file=fa_out2)

            previous_sam = None

        if out_prefix is not None:
            pyfastaq.utils.close(fa_out1)
            pyfastaq.utils.close(fa_out2)
        total_bases_added = 0

        for ctg in self.contigs:
            left_length, right_length = self.contigs[ctg].extend(self.ext_min_cov, self.ext_min_ratio, self.ext_bases)
            if self.verbose:
                print('    extend contig ' +  ctg, 'new_length:' + str(len(self.contigs[ctg])), 'added_left:' + str(left_length), 'added_right:' + str(right_length), sep='\t')
            self.contig_lengths[ctg].append([len(self.contigs[ctg]), left_length, right_length])
            total_bases_added += left_length + right_length

        return total_bases_added
Пример #3
0
 def test_get_ref_name(self):
     '''Test get_ref_name'''
     expected = ['ref'] * 14
     for i in ([5,10,11]):
         expected[i] = None
     sam_reader = pysam.Samfile(os.path.join(data_dir, 'mapping_test.smalt.out.bam'), "rb")
     i = 0
     for sam in sam_reader.fetch(until_eof=True):
         self.assertEqual(mapping.get_ref_name(sam, sam_reader), expected[i])
         i += 1
Пример #4
0
 def _get_ref_length(self, samfile, sam):
     if sam.is_unmapped:
         return None
     else:
         return len(self.contigs[mapping.get_ref_name(sam, samfile)])
Пример #5
0
 def _get_ref_length(self, samfile, sam):
     if sam.is_unmapped:
         return None
     else:
         return len(self.contigs[mapping.get_ref_name(sam, samfile)])