def run(self): self.getAlignmentDict() valueDict = {} for aId, aln in self.alignmentDict.iteritems(): valueDict[aId] = format_ratio(aln.matches + aln.repMatches, aln.matches + aln.repMatches + aln.misMatches + aln.qNumInsert) self.dumpValueDict(valueDict)
def bam_is_paired(path, num_reads=100000, paired_cutoff=0.75): """ Infers the paired-ness of a bam file. """ sam = pysam.Samfile(path) count = 0 for i, rec in enumerate(sam): if rec.is_paired: count += 1 if i == num_reads: break if format_ratio(count, num_reads) > 0.75: return True elif format_ratio(count, num_reads) < 1 - paired_cutoff: return False else: raise RuntimeError("Unable to infer pairing from bamfile {}".format(path))
def run(self): self.get_fasta() results_dict = {} for aln_id, t in self.transcript_iterator(): cds = t.get_cds(self.seq_dict) v = 100 * format_ratio(cds.count("N"), len(cds)) results_dict[aln_id] = v self.dump_attribute_results_to_disk(results_dict)
def has_only_short(bins, ids_included, ref_interval, tgt_intervals, percentage_of_ref=60.0): """ Are all of the consensus transcripts we found for this gene too short? """ source_size = len(ref_interval) tgt_sizes = [ len(tgt_intervals[x]) for x in zip(*bins.itervalues())[0] if x in ids_included ] return all([ 100 * format_ratio(tgt_size, source_size) < percentage_of_ref for tgt_size in tgt_sizes ])
def coverage(self): return 100 * format_ratio(self.matches + self.mismatches + self.repmatches, self.q_size)
def percent_n(self): return 100 * format_ratio(self.n_count, self.q_size)
def target_coverage(self): return 100 * format_ratio(self.matches + self.mismatches + self.repmatches, self.t_size)
def identity(self): return 100 * format_ratio(self.matches + self.repmatches, self.matches + self.repmatches + self.mismatches + self.q_num_insert)
def identity(p_list): m = sum(x.matches for x in p_list) mi = sum(x.misMatches for x in p_list) rep = sum(x.repMatches for x in p_list) ins = sum(x.qNumInsert for x in p_list) return format_ratio(m + rep, m + rep + mi + ins)
def coverage(p_list): m = sum(x.matches for x in p_list) mi = sum(x.misMatches for x in p_list) rep = sum(x.repMatches for x in p_list) return format_ratio(m + mi + rep, p_list[0].qSize)