def __call__(self, seqrecords): 'It filters out the seqrecords not found in the list.' stats = self._stats threshold = self.threshold reverse = self.reverse ignore_masked = self.ignore_masked stats[PROCESSED_PACKETS] += 1 processed_seqs = [] for seqrecord in seqrecords: stats[PROCESSED_SEQS] += 1 try: quals = seqrecord.letter_annotations['phred_quality'] except KeyError: msg = 'Some of the input sequences do not have qualities: {}' msg = msg.format(seqrecord.id) raise WrongFormatError(msg) if ignore_masked: seq = str(seqrecord.seq) seg_quals = [quals[segment[0]: segment[1] + 1] for segment in get_uppercase_segments(seq)] qual = sum(sum(q) * len(q) for q in seg_quals) / len(quals) else: qual = sum(quals) / len(quals) passed = True if qual >= threshold else False if reverse: passed = not(passed) if passed: processed_seqs.append(seqrecord) stats[YIELDED_SEQS] += 1 return processed_seqs
def _do_check(self, seq): seq_object = seq.object try: quals = seq_object.letter_annotations['phred_quality'] except KeyError: msg = 'Some of the input sequences do not have qualities: {}' msg = msg.format(get_name(seq)) raise WrongFormatError(msg) if self.ignore_masked: str_seq = str(seq_object.seq) seg_quals = [quals[segment[0]: segment[1] + 1] for segment in get_uppercase_segments(str_seq)] qual = sum(sum(q) * len(q) for q in seg_quals) / len(quals) else: qual = sum(quals) / len(quals) return True if qual >= self.threshold else False
def test_masked_locations(): "It test the masked locations function" assert list(get_uppercase_segments("aaATTTTTTaa")) == [(2, 8)] assert list(get_uppercase_segments("aaATTTaTTaa")) == [(2, 5), (7, 8)] assert list(get_uppercase_segments("AAATaaa")) == [(0, 3)] assert list(get_uppercase_segments("aaaaAAAA")) == [(4, 7)] seq = "AATTaaTTaaTTT" assert list(get_uppercase_segments(seq)) == [(0, 3), (6, 7), (10, 12)] assert list(get_uppercase_segments("AATT")) == [(0, 3)] assert not list(get_uppercase_segments("aatt"))
def test_masked_locations(): 'It test the masked locations function' assert list(get_uppercase_segments('aaATTTTTTaa')) == [(2, 8)] assert list(get_uppercase_segments('aaATTTaTTaa')) == [(2, 5), (7, 8)] assert list(get_uppercase_segments('AAATaaa')) == [(0, 3)] assert list(get_uppercase_segments('aaaaAAAA')) == [(4, 7)] seq = 'AATTaaTTaaTTT' assert list(get_uppercase_segments(seq)) == [(0, 3), (6, 7), (10, 12)] assert list(get_uppercase_segments('AATT')) == [(0, 3)] assert not list(get_uppercase_segments('aatt'))
def _do_trim(self, seq): str_seq = get_str_seq(seq) unmasked_segments = get_uppercase_segments(str_seq) segment = get_longest_segment(unmasked_segments) if segment is not None: segments = [] if segment[0] != 0: segments.append((0, segment[0] - 1)) len_seq = len(str_seq) if segment[1] != len_seq - 1: segments.append((segment[1] + 1, len_seq - 1)) _add_trim_segments(segments, seq, kind=OTHER) else: segments = [(0, len(seq))] _add_trim_segments(segments, seq, kind=OTHER) return seq
def __call__(self, seqs): 'It trims the masked segments of the seqrecords.' trimmed_seqs = [] for seq in seqs: str_seq = get_str_seq(seq) unmasked_segments = get_uppercase_segments(str_seq) segment = get_longest_segment(unmasked_segments) if segment is not None: segments = [] if segment[0] != 0: segments.append((0, segment[0] - 1)) len_seq = len(str_seq) if segment[1] != len_seq - 1: segments.append((segment[1] + 1, len_seq - 1)) _add_trim_segments(segments, seq, kind=OTHER) trimmed_seqs.append(seq) return trimmed_seqs
def __call__(self, seqrecords): 'It trims the masked segments of the seqrecords.' stats = self._stats stats[PROCESSED_PACKETS] += 1 trimmed_seqs = [] for seqrecord in seqrecords: stats[PROCESSED_SEQS] += 1 seq = str(seqrecord.seq) unmasked_segments = get_uppercase_segments(seq) segment = get_longest_segment(unmasked_segments) if segment is not None: stats[YIELDED_SEQS] += 1 segments = [] if segment[0] != 0: segments.append((0, segment[0] - 1)) len_seq = len(seqrecord) if segment[1] != len_seq - 1: segments.append((segment[1] + 1, len_seq - 1)) _add_trim_segments(segments, seqrecord, kind=OTHER) trimmed_seqs.append(seqrecord) #trimmed_seqs.append(seqrecord[segment[0]:segment[1] + 1]) return trimmed_seqs