Python AlignmentFile.count示例

编程语言: Python

命名空间/包名称: pysam

类/类型: AlignmentFile

方法/功能: count

hotexamples.com的示例: 2

Python AlignmentFile.count - 已找到2个示例。这些是从开源项目中提取的最受好评的pysam.AlignmentFile.count现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

AlignmentFile(30)

fetch(30)

write(30)

close(7)

getrname(5)

next(5)

pileup(4)

has_index(3)

count(2)

count_coverage(2)

get_reference_name(2)

check_index(1)

get_index_statistics(1)

parse_region(1)

reset(1)

seek(1)

示例#1

显示文件

    def _extract_regions(self, sample):
        """
        Code to extract the coverage information for the regions listed
        in the BED file.
        """

        if self.regions is None:
            return sample

        # get the pileup

        bam = AlignmentFile(sample.sample_bam)
        region_counts = []

        for i in self.regions.index:

            chrom = self.regions.at[i, 0]
            start = int(self.regions.at[i, 1])
            end = int(self.regions.at[i, 2])

            count = bam.count(chrom, start, end)

            region_counts.append({
                'chrom': chrom,
                'start': start,
                'end': end,
                'count': count
            })

        if len(region_counts) > 0:
            region_counts = pd.DataFrame(region_counts)
            sample.region_counts = region_counts

        return sample

示例#2

显示文件

文件： bam_to_pid.py 项目： zjyzjjzmt/EukDetect

def main(argv):

    #parse reference file..?
    #parse samtools header
    bam = AlignmentFile(sys.argv[1])

    observed = []
    for read in bam.fetch():
        if read.reference_name not in observed:
            observed.append(read.reference_name)

    ref_seqs = {}
    for seq in SeqIO.parse(sys.argv[2], 'fasta'):
        if seq.id in observed:
            ref_seqs[seq.id] = str(seq.seq)

    #count coverage
    print(
        "Subject\tReadcount\tCorrect_bases\tIncorrect_bases\tTotal_bases\tSubjlen\tCoverage\tPercent_ID"
    )
    for o in observed:
        contig_counts = bam.count(o, start=0, end=len(ref_seqs[o]))

        counts = bam.count_coverage(o, start=0, end=len(ref_seqs[o]))
        pos_ids = []
        trues = 0
        falses = 0
        total = 0
        for ref_pos in range(0, len(ref_seqs[o])):
            total += sum(counts[nt][ref_pos] for nt in range(4))

        if total == 0:
            continue

        for ref_pos in range(0, len(ref_seqs[o])):
            ref_allele = ref_seqs[o][ref_pos]
            depth = sum(counts[nt][ref_pos] for nt in range(4))
            count_a = counts[0][ref_pos]
            count_c = counts[1][ref_pos]
            count_g = counts[2][ref_pos]
            count_t = counts[3][ref_pos]
            values = [
                o, ref_pos + 1, ref_allele, depth, count_a, count_c, count_g,
                count_t
            ]
            #if o == "protist-Blastocystis_sp_subtype_2-1079827at2759-S1":
            #print(depth)
            #	print(ref_allele)
            #print(count_a)
            #print(count_c)
            #print(count_g)
            #print(count_t)
            if depth > 0:
                #now we calculate the percentage
                not_n = True
                if ref_allele == "A":
                    true = count_a
                    false = count_c + count_g + count_t
                elif ref_allele == "C":
                    true = count_c
                    false = count_a + count_g + count_t
                elif ref_allele == "G":
                    true = count_g
                    false = count_a + count_c + count_t
                elif ref_allele == "T":
                    true = count_t
                    false = count_a + count_c + count_g
                else:
                    #it's an n, skip it
                    not_n = False
                #maybe just have it as an absolute. if there's one mismatch it's all wrong.
                if not_n:
                    if false > 0:
                        falses += 1
                    else:
                        trues += 1
                #trues += true
                #falses += false
                #ratio = true /(true + false)
                #need the trues and positives for each ref_pos

                #print('\t'.join(str(val) for val in values) + '\t' + str(ratio))
        #pid = round(sum(pos_ids) / len(pos_ids) * 100, 2)
        #print(o + '\t' + str(contig_counts) + '\t' + str(pid))
        seqlen = len(ref_seqs[o])
        #	print(seqlen)
        #	print(trues)
        #	print(falses)
        #	print(o)
        coverage = round(((trues + falses) / seqlen) * 100, 2)
        if trues == 0 and falses == 0:
            pid = 0
        else:
            pid = round((trues / (trues + falses)) * 100, 2)
        print(o + '\t' + str(contig_counts) + '\t' + str(trues) + '\t' +
              str(falses) + '\t' + str(trues + falses) + '\t' + str(seqlen) +
              '\t' + str(coverage) + '\t' + str(pid))