def _process_read(self, read): while len(self.stats["positions"]) < len(read[1]): self.stats["positions"].append(deepcopy(self.initial_dict)) for pos in range(len(read[1])): base = read[1][pos] score = pyngsqc.get_qual_from_phred(read[3][pos], self.qual_offset) self.stats["positions"][pos]["scores"][score] += 1 try: self.stats["positions"][pos]["bases"][base] += 1 except KeyError: self.stats["positions"][pos]["bases"]["N"] += 1
def _process_read(self, read): while len(self.stats["positions"]) < len(read[1]): self.stats["positions"].append(deepcopy(self.initial_dict)) for pos in xrange(len(read[1])): base = read[1][pos] score = pyngsqc.get_qual_from_phred(read[3][pos], self.qual_offset) self.stats["positions"][pos]["scores"][score] += 1 try: self.stats["positions"][pos]["bases"][base] += 1 except KeyError: self.stats["positions"][pos]["bases"]["N"] += 1
def trim_read(self, read): for iii in reversed(xrange(len(read[1]))): if self.remove_trailing_Ns and read[1][iii].upper() == "N": # Remove last base and score read[1] = read[1][:-1] # Base read[3] = read[3][:-1] # Phred Score elif pyngsqc.get_qual_from_phred(read[3][iii], self.qual_offset) \ < self.qual_threshold: read[1] = read[1][:-1] # Base read[3] = read[3][:-1] # Phred Score if len(read[1]) < self.min_length: return () else: return read
def _passes_qc(read): qual = read[3] read_len = len(qual) low_scores = 0 for p in qual: if pyngsqc.get_qual_from_phred(p, self.qual_offset) < self.qual_threshold: low_scores += 1 this_pass_rate = 1.0 - float(low_scores) / float(read_len) if this_pass_rate <= self.pass_rate: return False elif self.max_Ns != -1 and int(pyngsqc.num_Ns_in_read(read)) > self.max_Ns: return False else: return True
def _passes_qc(read): qual = read[3] read_len = len(qual) low_scores = 0 for p in qual: if pyngsqc.get_qual_from_phred(p, self.qual_offset) <\ self.qual_threshold: low_scores += 1 this_pass_rate = 1.0 - float(low_scores) / float(read_len) if this_pass_rate <= self.pass_rate: return False elif self.max_Ns != -1 and \ int(pyngsqc.num_Ns_in_read(read)) > self.max_Ns: return False else: return True