Exemplo n.º 1
0
 def run(self):
     self.getAlignmentDict()
     valueDict = {}
     for aId, aln in self.alignmentDict.iteritems():
         valueDict[aId] = format_ratio(aln.matches + aln.repMatches, aln.matches + aln.repMatches + aln.misMatches +
                                      aln.qNumInsert)
     self.dumpValueDict(valueDict)
Exemplo n.º 2
0
def bam_is_paired(path, num_reads=100000, paired_cutoff=0.75):
    """
    Infers the paired-ness of a bam file.
    """
    sam = pysam.Samfile(path)
    count = 0
    for i, rec in enumerate(sam):
        if rec.is_paired:
            count += 1
        if i == num_reads:
            break
    if format_ratio(count, num_reads) > 0.75:
        return True
    elif format_ratio(count, num_reads) < 1 - paired_cutoff:
        return False
    else:
        raise RuntimeError("Unable to infer pairing from bamfile {}".format(path))
Exemplo n.º 3
0
 def run(self):
     self.get_fasta()
     results_dict = {}
     for aln_id, t in self.transcript_iterator():
         cds = t.get_cds(self.seq_dict)
         v = 100 * format_ratio(cds.count("N"), len(cds))
         results_dict[aln_id] = v
     self.dump_attribute_results_to_disk(results_dict)
def has_only_short(bins,
                   ids_included,
                   ref_interval,
                   tgt_intervals,
                   percentage_of_ref=60.0):
    """
    Are all of the consensus transcripts we found for this gene too short?
    """
    source_size = len(ref_interval)
    tgt_sizes = [
        len(tgt_intervals[x]) for x in zip(*bins.itervalues())[0]
        if x in ids_included
    ]
    return all([
        100 * format_ratio(tgt_size, source_size) < percentage_of_ref
        for tgt_size in tgt_sizes
    ])
Exemplo n.º 5
0
 def coverage(self):
     return 100 * format_ratio(self.matches + self.mismatches + self.repmatches, self.q_size)
Exemplo n.º 6
0
 def percent_n(self):
     return 100 * format_ratio(self.n_count, self.q_size)
Exemplo n.º 7
0
 def target_coverage(self):
     return 100 * format_ratio(self.matches + self.mismatches + self.repmatches, self.t_size)
Exemplo n.º 8
0
 def identity(self):
     return 100 * format_ratio(self.matches + self.repmatches,
                               self.matches + self.repmatches + self.mismatches + self.q_num_insert)
Exemplo n.º 9
0
def identity(p_list):
    m = sum(x.matches for x in p_list)
    mi = sum(x.misMatches for x in p_list)
    rep = sum(x.repMatches for x in p_list)
    ins = sum(x.qNumInsert for x in p_list)
    return format_ratio(m + rep, m + rep + mi + ins)
Exemplo n.º 10
0
def coverage(p_list):
    m = sum(x.matches for x in p_list)
    mi = sum(x.misMatches for x in p_list)
    rep = sum(x.repMatches for x in p_list)
    return format_ratio(m + mi + rep, p_list[0].qSize)