Пример #1
0
    def __call__(self, seqs):
        'It splits a list of sequences with the provided linkers'
        seq_fhand = write_seqs(seqs, file_format='fasta')
        seq_fhand.flush()

        min_identity = 87.0
        min_len = 13
        filters = [{'kind': 'min_length', 'min_num_residues': min_len,
                    'length_in_query': False, 'filter_match_parts': True},
                   {'kind': 'score_threshold', 'score_key': 'identity',
                   'min_score': min_identity}]

        matcher = BlasterForFewSubjects(seq_fhand.name, self.linkers,
                                        program='blastn', filters=filters,
                                        params={'task': 'blastn-short'},
                                        elongate_for_global=True,
                                        seqs_type=NUCL)
        new_seqs = []
        for seq in seqs:
            segments = matcher.get_matched_segments_for_read(get_name(seq))
            if segments is not None:
                split_seqs = self._split_by_mate_linker(seq, segments)
            else:
                split_seqs = [seq]
            for seq in split_seqs:
                new_seqs.append(seq)
        return new_seqs
Пример #2
0
    def __call__(self, seqs):
        'It splits a list of sequences with the provided linkers'
        seq_fhand = write_seqs(seqs, file_format='fasta')
        seq_fhand.flush()

        min_identity = 87.0
        min_len = 13
        filters = [{
            'kind': 'min_length',
            'min_num_residues': min_len,
            'length_in_query': False,
            'filter_match_parts': True
        }, {
            'kind': 'score_threshold',
            'score_key': 'identity',
            'min_score': min_identity
        }]

        matcher = BlasterForFewSubjects(seq_fhand.name,
                                        self.linkers,
                                        program='blastn',
                                        filters=filters,
                                        params={'task': 'blastn-short'},
                                        elongate_for_global=True,
                                        seqs_type=NUCL)
        new_seqs = []
        for seq in seqs:
            segments = matcher.get_matched_segments_for_read(get_name(seq))
            if segments is not None:
                split_seqs = self._split_by_mate_linker(seq, segments)
            else:
                split_seqs = [seq]
            for seq in split_seqs:
                new_seqs.append(seq)
        return new_seqs
Пример #3
0
 def _pre_trim(self, trim_packet):
     seqs = [s for seqs in trim_packet[SEQS_PASSED]for s in seqs]
     db_fhand = write_seqs(seqs, file_format='fasta')
     db_fhand.flush()
     params = {'task': 'blastn-short', 'expect': '0.0001'}
     filters = [{'kind': 'score_threshold', 'score_key': 'identity',
                 'min_score': 87},
                {'kind': 'min_length', 'min_num_residues': 13,
                 'length_in_query': False}]
     self._matcher = BlasterForFewSubjects(db_fhand.name, self.oligos,
                                          program='blastn', filters=filters,
                                          params=params,
                                          elongate_for_global=True)
Пример #4
0
    def test_matching_segments(self):
        'It tests the detection of oligos in sequence files'
        seq_5 = 'CTAGTCTAGTCGTAGTCATGGCTGTAGTCTAGTCTACGATTCGTATCAGTTGTGTGAC'
        mate_fhand = create_a_matepair_file()

        linkers = assing_kind_to_seqs(SEQRECORD, LINKERS, None)

        expected_region = (len(seq_5), len(seq_5 + TITANIUM_LINKER) - 1)
        matcher = BlasterForFewSubjects(mate_fhand.name, linkers,
                                             program='blastn',
                                             elongate_for_global=True)
        linker_region = matcher.get_matched_segments_for_read('seq1')[0]
        assert [expected_region] == linker_region
Пример #5
0
    def test_matching_segments(self):
        'It tests the detection of oligos in sequence files'
        seq_5 = 'CTAGTCTAGTCGTAGTCATGGCTGTAGTCTAGTCTACGATTCGTATCAGTTGTGTGAC'
        mate_fhand = create_a_matepair_file()

        linkers = [SeqItem('titan', ['>titan\n', TITANIUM_LINKER + '\n']),
                   SeqItem('flx', ['>flx\n', FLX_LINKER + '\n'])]
        linkers = assing_kind_to_seqs(SEQITEM, linkers, 'fasta')

        expected_region = (len(seq_5), len(seq_5 + TITANIUM_LINKER) - 1)
        matcher = BlasterForFewSubjects(mate_fhand.name, linkers,
                                             program='blastn',
                                             elongate_for_global=True)
        linker_region = matcher.get_matched_segments_for_read('seq1')[0]
        assert [expected_region] == linker_region
Пример #6
0
class TrimWithBlastShort(_BaseTrim):
    'It trims adaptors with the blast short algorithm'

    def __init__(self, oligos):
        'The initiator'
        self.oligos = oligos
        super(TrimWithBlastShort, self).__init__()

    def _pre_trim(self, trim_packet):
        seqs = [s for seqs in trim_packet[SEQS_PASSED] for s in seqs]
        db_fhand = write_seqs(seqs, file_format='fasta')
        db_fhand.flush()
        params = {'task': 'blastn-short', 'expect': '0.0001'}
        filters = [{
            'kind': 'score_threshold',
            'score_key': 'identity',
            'min_score': 87
        }, {
            'kind': 'min_length',
            'min_num_residues': 13,
            'length_in_query': False
        }]
        self._matcher = BlasterForFewSubjects(db_fhand.name,
                                              self.oligos,
                                              program='blastn',
                                              filters=filters,
                                              params=params,
                                              elongate_for_global=True)

    def _do_trim(self, seq):
        'It trims the masked segments of the SeqWrappers.'
        segments = self._matcher.get_matched_segments_for_read(get_name(seq))
        if segments is not None:
            _add_trim_segments(segments[0], seq, kind=VECTOR)
        return seq
Пример #7
0
class FilterBlastShort(_BaseFilter):
    'It filters a seq if there is a match against the given oligos'
    def __init__(self, oligos, failed_drags_pair=True, reverse=False):
        self.oligos = oligos
        super(FilterBlastShort, self).__init__(reverse=reverse,
                                          failed_drags_pair=failed_drags_pair)

    def _setup_checks(self, filterpacket):
        seqs = [s for seqs in filterpacket[SEQS_PASSED]for s in seqs]

        # we create a blastdb for these reads and then we use the oligos
        # as the blast query
        db_fhand = write_seqs(seqs, file_format='fasta')
        db_fhand.flush()
        params = {'task': 'blastn-short', 'expect': '0.0001'}
        filters = [{'kind': 'score_threshold', 'score_key': 'identity',
                    'min_score': 87},
                   {'kind': 'min_length', 'min_num_residues': 13,
                    'length_in_query': False}]
        self._matcher = BlasterForFewSubjects(db_fhand.name, self.oligos,
                                             program='blastn', filters=filters,
                                             params=params,
                                             elongate_for_global=False)

    def _do_check(self, seq):
        segments = self._matcher.get_matched_segments_for_read(get_name(seq))
        return True if segments is None else False
Пример #8
0
class TrimWithBlastShort(_BaseTrim):
    "It trims adaptors with the blast short algorithm"

    def __init__(self, oligos):
        "The initiator"
        self.oligos = oligos
        super(TrimWithBlastShort, self).__init__()

    def _pre_trim(self, trim_packet):
        seqs = [s for seqs in trim_packet[SEQS_PASSED] for s in seqs]
        db_fhand = write_seqs(seqs, file_format="fasta")
        db_fhand.flush()
        params = {"task": "blastn-short", "expect": "0.0001"}
        filters = [
            {"kind": "score_threshold", "score_key": "identity", "min_score": 87},
            {"kind": "min_length", "min_num_residues": 13, "length_in_query": False},
        ]
        self._matcher = BlasterForFewSubjects(
            db_fhand.name, self.oligos, program="blastn", filters=filters, params=params, elongate_for_global=True
        )

    def _do_trim(self, seq):
        "It trims the masked segments of the SeqWrappers."
        segments = self._matcher.get_matched_segments_for_read(get_name(seq))
        if segments is not None:
            _add_trim_segments(segments[0], seq, kind=VECTOR)
        return seq
Пример #9
0
class FilterBlastShort(_BaseFilter):
    'It filters a seq if there is a match against the given oligos'
    def __init__(self, oligos, failed_drags_pair=True, reverse=False):
        self.oligos = oligos
        super(FilterBlastShort, self).__init__(reverse=reverse,
                                          failed_drags_pair=failed_drags_pair)

    def _setup_checks(self, filterpacket):
        seqs = [s for seqs in filterpacket[SEQS_PASSED]for s in seqs]

        # we create a blastdb for these reads and then we use the oligos
        # as the blast query
        db_fhand = write_seqs(seqs, file_format='fasta')
        db_fhand.flush()
        params = {'task': 'blastn-short', 'expect': '0.0001'}
        filters = [{'kind': 'score_threshold', 'score_key': 'identity',
                    'min_score': 87},
                   {'kind': 'min_length', 'min_num_residues': 13,
                    'length_in_query': False}]
        self._matcher = BlasterForFewSubjects(db_fhand.name, self.oligos,
                                             program='blastn', filters=filters,
                                             params=params,
                                             elongate_for_global=False)

    def _do_check(self, seq):
        segments = self._matcher.get_matched_segments_for_read(get_name(seq))
        return True if segments is None else False
Пример #10
0
class TrimWithBlastShort(_BaseTrim):
    'It trims adaptors with the blast short algorithm'
    def __init__(self, oligos):
        'The initiator'
        self.oligos = oligos
        super(TrimWithBlastShort, self).__init__()

    def _pre_trim(self, trim_packet):
        seqs = [s for seqs in trim_packet[SEQS_PASSED]for s in seqs]
        db_fhand = write_seqs(seqs, file_format='fasta')
        db_fhand.flush()
        params = {'task': 'blastn-short', 'expect': '0.0001'}
        filters = [{'kind': 'score_threshold', 'score_key': 'identity',
                    'min_score': 87},
                   {'kind': 'min_length', 'min_num_residues': 13,
                    'length_in_query': False}]
        self._matcher = BlasterForFewSubjects(db_fhand.name, self.oligos,
                                             program='blastn', filters=filters,
                                             params=params,
                                             elongate_for_global=True)

    def _do_trim(self, seq):
        'It trims the masked segments of the SeqWrappers.'
        segments = self._matcher.get_matched_segments_for_read(get_name(seq))
        if segments is not None:
            _add_trim_segments(segments[0], seq, kind=VECTOR)
        return seq
Пример #11
0
    def _setup_checks(self, filterpacket):
        seqs = [s for seqs in filterpacket[SEQS_PASSED]for s in seqs]

        # we create a blastdb for these reads and then we use the oligos
        # as the blast query
        db_fhand = write_seqs(seqs, file_format='fasta')
        db_fhand.flush()
        params = {'task': 'blastn-short', 'expect': '0.0001'}
        filters = [{'kind': 'score_threshold', 'score_key': 'identity',
                    'min_score': 87},
                   {'kind': 'min_length', 'min_num_residues': 13,
                    'length_in_query': False}]
        self._matcher = BlasterForFewSubjects(db_fhand.name, self.oligos,
                                             program='blastn', filters=filters,
                                             params=params,
                                             elongate_for_global=False)
Пример #12
0
class TrimNexteraAdapters(_BaseTrim):
    "It trims from Nextera adaptors found with blast short algorithm to 3'end"
    "If adapter is at one end and it is not complete, it trims more bases"
    def __init__(self, oligos):
        'The initiator'
        self.oligos = oligos
        super(TrimNexteraAdapters, self).__init__()

    def _pre_trim(self, trim_packet):
        seqs = [s for seqs in trim_packet[SEQS_PASSED]for s in seqs]
        db_fhand = write_seqs(seqs, file_format='fasta')
        db_fhand.flush()
        params = {'task': 'blastn-short', 'expect': '0.0001'}
        filters = [{'kind': 'score_threshold', 'score_key': 'identity',
                    'min_score': 87},
                   {'kind': 'min_length', 'min_num_residues': 13,
                    'length_in_query': False}]
        self._matcher = BlasterForFewSubjects(db_fhand.name, self.oligos,
                                             program='blastn', filters=filters,
                                             params=params,
                                             elongate_for_global=True)

    def _do_trim(self, seq):
        'It trims the masked segments of the SeqWrappers.'
        segments = self._matcher.get_matched_segments_for_read(get_name(seq))
        if segments is not None:
            segments = [(segment[0], get_length(seq) - 1) for segment in segments[0]]
            _add_trim_segments(segments, seq, kind=OTHER)
        return seq
Пример #13
0
 def __call__(self, seqs):
     'It trims the masked segments of the SeqWrappers.'
     db_fhand = write_seqs(seqs, file_format='fasta')
     db_fhand.flush()
     params = {'task': 'blastn-short', 'expect': '0.0001'}
     filters = [{'kind': 'score_threshold', 'score_key': 'identity',
                 'min_score': 89},
                {'kind': 'min_length', 'min_num_residues': 13,
                 'length_in_query': False}]
     matcher = BlasterForFewSubjects(db_fhand.name, self.oligos,
                                     program='blastn', filters=filters,
                                     params=params,
                                     elongate_for_global=True)
     for seq in seqs:
         segments = matcher.get_matched_segments_for_read(get_name(seq))
         if segments is not None:
             _add_trim_segments(segments[0], seq, kind=VECTOR)
     return seqs
Пример #14
0
    def test_matching_segments(self):
        'It tests the detection of oligos in sequence files'
        seq_5 = 'CTAGTCTAGTCGTAGTCATGGCTGTAGTCTAGTCTACGATTCGTATCAGTTGTGTGAC'
        mate_fhand = create_a_matepair_file()

        linkers = [
            SeqItem('titan', ['>titan\n', TITANIUM_LINKER + '\n']),
            SeqItem('flx', ['>flx\n', FLX_LINKER + '\n'])
        ]
        linkers = assing_kind_to_seqs(SEQITEM, linkers, 'fasta')

        expected_region = (len(seq_5), len(seq_5 + TITANIUM_LINKER) - 1)
        matcher = BlasterForFewSubjects(mate_fhand.name,
                                        linkers,
                                        program='blastn',
                                        elongate_for_global=True)
        linker_region = matcher.get_matched_segments_for_read('seq1')[0]
        assert [expected_region] == linker_region
Пример #15
0
 def _pre_trim(self, trim_packet):
     seqs = [s for seqs in trim_packet[SEQS_PASSED] for s in seqs]
     db_fhand = write_seqs(seqs, file_format="fasta")
     db_fhand.flush()
     params = {"task": "blastn-short", "expect": "0.0001"}
     filters = [
         {"kind": "score_threshold", "score_key": "identity", "min_score": 87},
         {"kind": "min_length", "min_num_residues": 13, "length_in_query": False},
     ]
     self._matcher = BlasterForFewSubjects(
         db_fhand.name, self.oligos, program="blastn", filters=filters, params=params, elongate_for_global=True
     )
Пример #16
0
 def _pre_trim(self, trim_packet):
     seqs = [s for seqs in trim_packet[SEQS_PASSED]for s in seqs]
     db_fhand = write_seqs(seqs, file_format='fasta')
     db_fhand.flush()
     params = {'task': 'blastn-short', 'expect': '0.0001'}
     filters = [{'kind': 'score_threshold', 'score_key': 'identity',
                 'min_score': 87},
                {'kind': 'min_length', 'min_num_residues': 13,
                 'length_in_query': False}]
     self._matcher = BlasterForFewSubjects(db_fhand.name, self.oligos,
                                          program='blastn', filters=filters,
                                          params=params,
                                          elongate_for_global=True)
Пример #17
0
    def _setup_checks(self, filterpacket):
        seqs = [s for seqs in filterpacket[SEQS_PASSED]for s in seqs]

        # we create a blastdb for these reads and then we use the oligos
        # as the blast query
        db_fhand = write_seqs(seqs, file_format='fasta')
        db_fhand.flush()
        params = {'task': 'blastn-short', 'expect': '0.0001'}
        filters = [{'kind': 'score_threshold', 'score_key': 'identity',
                    'min_score': 87},
                   {'kind': 'min_length', 'min_num_residues': 13,
                    'length_in_query': False}]
        self._matcher = BlasterForFewSubjects(db_fhand.name, self.oligos,
                                             program='blastn', filters=filters,
                                             params=params,
                                             elongate_for_global=False)
Пример #18
0
class TrimNexteraAdapters(_BaseTrim):
    "It trims from Nextera adaptors found with blast short algorithm to 3'end"
    "If adapter is at one end and it is not complete, it trims more bases"

    def __init__(self, oligos):
        'The initiator'
        self.oligos = oligos
        super(TrimNexteraAdapters, self).__init__()

    def _pre_trim(self, trim_packet):
        seqs = [s for seqs in trim_packet[SEQS_PASSED] for s in seqs]
        db_fhand = write_seqs(seqs, file_format='fasta')
        db_fhand.flush()
        params = {'task': 'blastn-short', 'expect': '0.0001'}
        filters = [{
            'kind': 'score_threshold',
            'score_key': 'identity',
            'min_score': 87
        }, {
            'kind': 'min_length',
            'min_num_residues': 13,
            'length_in_query': False
        }]
        self._matcher = BlasterForFewSubjects(db_fhand.name,
                                              self.oligos,
                                              program='blastn',
                                              filters=filters,
                                              params=params,
                                              elongate_for_global=True)

    def _do_trim(self, seq):
        'It trims the masked segments of the SeqWrappers.'
        segments = self._matcher.get_matched_segments_for_read(get_name(seq))
        if segments is not None:
            segments = [(segment[0], get_length(seq) - 1)
                        for segment in segments[0]]
            _add_trim_segments(segments, seq, kind=OTHER)
        return seq