示例#1
0
    def __call__(self, seqrecords):
        'It filters out the seqrecords not found in the list.'
        stats = self._stats
        threshold = self.threshold
        reverse = self.reverse
        ignore_masked = self.ignore_masked

        stats[PROCESSED_PACKETS] += 1
        processed_seqs = []
        for seqrecord in seqrecords:
            stats[PROCESSED_SEQS] += 1
            try:
                quals = seqrecord.letter_annotations['phred_quality']
            except KeyError:
                msg = 'Some of the input sequences do not have qualities: {}'
                msg = msg.format(seqrecord.id)
                raise WrongFormatError(msg)
            if ignore_masked:
                seq = str(seqrecord.seq)
                seg_quals = [quals[segment[0]: segment[1] + 1]
                                    for segment in get_uppercase_segments(seq)]
                qual = sum(sum(q) * len(q) for q in seg_quals) / len(quals)
            else:
                qual = sum(quals) / len(quals)
            passed = True if qual >= threshold else False
            if reverse:
                passed = not(passed)
            if passed:
                processed_seqs.append(seqrecord)
                stats[YIELDED_SEQS] += 1
        return processed_seqs
示例#2
0
 def _do_check(self, seq):
     seq_object = seq.object
     try:
         quals = seq_object.letter_annotations['phred_quality']
     except KeyError:
         msg = 'Some of the input sequences do not have qualities: {}'
         msg = msg.format(get_name(seq))
         raise WrongFormatError(msg)
     if self.ignore_masked:
         str_seq = str(seq_object.seq)
         seg_quals = [quals[segment[0]: segment[1] + 1]
                         for segment in get_uppercase_segments(str_seq)]
         qual = sum(sum(q) * len(q) for q in seg_quals) / len(quals)
     else:
         qual = sum(quals) / len(quals)
     return True if qual >= self.threshold else False
示例#3
0
 def _do_check(self, seq):
     seq_object = seq.object
     try:
         quals = seq_object.letter_annotations['phred_quality']
     except KeyError:
         msg = 'Some of the input sequences do not have qualities: {}'
         msg = msg.format(get_name(seq))
         raise WrongFormatError(msg)
     if self.ignore_masked:
         str_seq = str(seq_object.seq)
         seg_quals = [quals[segment[0]: segment[1] + 1]
                         for segment in get_uppercase_segments(str_seq)]
         qual = sum(sum(q) * len(q) for q in seg_quals) / len(quals)
     else:
         qual = sum(quals) / len(quals)
     return True if qual >= self.threshold else False
    def test_masked_locations():
        "It test the masked locations function"

        assert list(get_uppercase_segments("aaATTTTTTaa")) == [(2, 8)]

        assert list(get_uppercase_segments("aaATTTaTTaa")) == [(2, 5), (7, 8)]

        assert list(get_uppercase_segments("AAATaaa")) == [(0, 3)]

        assert list(get_uppercase_segments("aaaaAAAA")) == [(4, 7)]

        seq = "AATTaaTTaaTTT"
        assert list(get_uppercase_segments(seq)) == [(0, 3), (6, 7), (10, 12)]

        assert list(get_uppercase_segments("AATT")) == [(0, 3)]
        assert not list(get_uppercase_segments("aatt"))
示例#5
0
    def test_masked_locations():
        'It test the masked locations function'

        assert list(get_uppercase_segments('aaATTTTTTaa')) == [(2, 8)]

        assert list(get_uppercase_segments('aaATTTaTTaa')) == [(2, 5), (7, 8)]

        assert list(get_uppercase_segments('AAATaaa')) == [(0, 3)]

        assert list(get_uppercase_segments('aaaaAAAA')) == [(4, 7)]

        seq = 'AATTaaTTaaTTT'
        assert list(get_uppercase_segments(seq)) == [(0, 3), (6, 7), (10, 12)]

        assert list(get_uppercase_segments('AATT')) == [(0, 3)]
        assert not list(get_uppercase_segments('aatt'))
示例#6
0
    def test_masked_locations():
        'It test the masked locations function'

        assert list(get_uppercase_segments('aaATTTTTTaa')) == [(2, 8)]

        assert list(get_uppercase_segments('aaATTTaTTaa')) == [(2, 5), (7, 8)]

        assert list(get_uppercase_segments('AAATaaa')) == [(0, 3)]

        assert list(get_uppercase_segments('aaaaAAAA')) == [(4, 7)]

        seq = 'AATTaaTTaaTTT'
        assert list(get_uppercase_segments(seq)) == [(0, 3), (6, 7), (10, 12)]

        assert list(get_uppercase_segments('AATT')) == [(0, 3)]
        assert not list(get_uppercase_segments('aatt'))
示例#7
0
文件: trim.py 项目: milw/seq_crumbs
    def _do_trim(self, seq):
        str_seq = get_str_seq(seq)
        unmasked_segments = get_uppercase_segments(str_seq)
        segment = get_longest_segment(unmasked_segments)
        if segment is not None:
            segments = []
            if segment[0] != 0:
                segments.append((0, segment[0] - 1))
            len_seq = len(str_seq)
            if segment[1] != len_seq - 1:
                segments.append((segment[1] + 1, len_seq - 1))

            _add_trim_segments(segments, seq, kind=OTHER)

        else:
            segments = [(0, len(seq))]
            _add_trim_segments(segments, seq, kind=OTHER)
        return seq
示例#8
0
    def _do_trim(self, seq):
        str_seq = get_str_seq(seq)
        unmasked_segments = get_uppercase_segments(str_seq)
        segment = get_longest_segment(unmasked_segments)
        if segment is not None:
            segments = []
            if segment[0] != 0:
                segments.append((0, segment[0] - 1))
            len_seq = len(str_seq)
            if segment[1] != len_seq - 1:
                segments.append((segment[1] + 1, len_seq - 1))

            _add_trim_segments(segments, seq, kind=OTHER)

        else:
            segments = [(0, len(seq))]
            _add_trim_segments(segments, seq, kind=OTHER)
        return seq
示例#9
0
文件: trim.py 项目: fastq/seq_crumbs
    def __call__(self, seqs):
        'It trims the masked segments of the seqrecords.'
        trimmed_seqs = []
        for seq in seqs:
            str_seq = get_str_seq(seq)
            unmasked_segments = get_uppercase_segments(str_seq)
            segment = get_longest_segment(unmasked_segments)
            if segment is not None:
                segments = []
                if segment[0] != 0:
                    segments.append((0, segment[0] - 1))
                len_seq = len(str_seq)
                if segment[1] != len_seq - 1:
                    segments.append((segment[1] + 1, len_seq - 1))

                _add_trim_segments(segments, seq, kind=OTHER)
                trimmed_seqs.append(seq)
        return trimmed_seqs
示例#10
0
    def __call__(self, seqrecords):
        'It trims the masked segments of the seqrecords.'
        stats = self._stats
        stats[PROCESSED_PACKETS] += 1
        trimmed_seqs = []
        for seqrecord in seqrecords:
            stats[PROCESSED_SEQS] += 1
            seq = str(seqrecord.seq)
            unmasked_segments = get_uppercase_segments(seq)
            segment = get_longest_segment(unmasked_segments)
            if segment is not None:
                stats[YIELDED_SEQS] += 1
                segments = []
                if segment[0] != 0:
                    segments.append((0, segment[0] - 1))
                len_seq = len(seqrecord)
                if segment[1] != len_seq - 1:
                    segments.append((segment[1] + 1, len_seq - 1))

                _add_trim_segments(segments, seqrecord, kind=OTHER)
                trimmed_seqs.append(seqrecord)
            #trimmed_seqs.append(seqrecord[segment[0]:segment[1] + 1])
        return trimmed_seqs