示例#1
0
def test_compare_prefixes():
    assert compare_prefixes('AAXAA', 'AAAAATTTTTTTTT') == (0, 5, 0, 5, 4, 1)
    assert compare_prefixes('AANAA', 'AACAATTTTTTTTT', wildcard_ref=True) == (
        0, 5, 0, 5, 5, 0
    )
    assert compare_prefixes('AANAA', 'AACAATTTTTTTTT', wildcard_ref=True) == (
        0, 5, 0, 5, 5, 0
    )
    assert compare_prefixes('XAAAAA', 'AAAAATTTTTTTTT') == (0, 6, 0, 6, 4, 2)
    a = WILDCARD_SEQUENCES[0]
    for s in WILDCARD_SEQUENCES:
        r = s + 'GCCAGGGTTGATTCGGCTGATCTGGCCG'
        result = compare_prefixes(a, r, wildcard_query=True)
        assert result == (0, 10, 0, 10, 10, 0), result
        result = compare_prefixes(r, a, wildcard_ref=True)
        assert result == (0, 10, 0, 10, 10, 0)
    for s in WILDCARD_SEQUENCES:
        for t in WILDCARD_SEQUENCES:
            r = s + 'GCCAGGG'
            result = compare_prefixes(s, r)
            assert result == (0, 10, 0, 10, 10, 0)
            result = compare_prefixes(r, s, wildcard_ref=True, wildcard_query=True)
            assert result == (0, 10, 0, 10, 10, 0)
    r = WILDCARD_SEQUENCES[0] + 'GCCAGG'
    for wildc_ref in (False, True):
        for wildc_query in (False, True):
            result = compare_prefixes(
                'CCCXTTXATC', r, wildcard_ref=wildc_ref, wildcard_query=wildc_query
            )
            assert result == (0, 10, 0, 10, 8, 2)
示例#2
0
def test_compare_prefixes():
    assert compare_prefixes('AAXAA', 'AAAAATTTTTTTTT') == (0, 5, 0, 5, 4, 1)
    assert compare_prefixes('AANAA', 'AACAATTTTTTTTT', wildcard_ref=True) == (0, 5, 0, 5, 5, 0)
    assert compare_prefixes('AANAA', 'AACAATTTTTTTTT', wildcard_ref=True) == (0, 5, 0, 5, 5, 0)
    assert compare_prefixes('XAAAAA', 'AAAAATTTTTTTTT') == (0, 6, 0, 6, 4, 2)

    a = WILDCARD_SEQUENCES[0]
    for s in WILDCARD_SEQUENCES:
        r = s + 'GCCAGGGTTGATTCGGCTGATCTGGCCG'
        result = compare_prefixes(a, r, wildcard_query=True)
        assert result == (0, 10, 0, 10, 10, 0), result

        result = compare_prefixes(r, a, wildcard_ref=True)
        assert result == (0, 10, 0, 10, 10, 0)

    for s in WILDCARD_SEQUENCES:
        for t in WILDCARD_SEQUENCES:
            r = s + 'GCCAGGG'
            result = compare_prefixes(s, r, )
            assert result == (0, 10, 0, 10, 10, 0)

            result = compare_prefixes(r, s, wildcard_ref=True, wildcard_query=True)
            assert result == (0, 10, 0, 10, 10, 0)

    r = WILDCARD_SEQUENCES[0] + 'GCCAGG'
    for wildc_ref in (False, True):
        for wildc_query in (False, True):
            result = compare_prefixes('CCCXTTXATC', r, wildcard_ref=wildc_ref, wildcard_query=wildc_query)
            assert result == (0, 10, 0, 10, 8, 2)
示例#3
0
    def match_to(self, read):
        """Attempt to match this adapter to the given read.

        Args:
            read: A :class:`Sequence` instance.

        Returns:
            A :class:`Match` instance if a match was found; return None if no
            match was found given the matching criteria (minimum overlap length,
            maximum error rate).
        """
        read_seq = read.sequence.upper()

        # try to find an exact match first unless wildcards are allowed
        pos = -1
        if not self.adapter_wildcards:
            if self.where == PREFIX:
                if read_seq.startswith(self.sequence):
                    pos = 0
            elif self.where == SUFFIX:
                if read_seq.endswith(self.sequence):
                    pos = (len(read_seq) - len(self.sequence))
            else:
                pos = read_seq.find(self.sequence)

        if pos >= 0:
            seqlen = len(self.sequence)
            return Match(0, seqlen, pos, pos + seqlen, seqlen, 0,
                         self._front_flag, self, read)

        # try approximate matching
        if not self.indels and self.where in (PREFIX, SUFFIX):
            if self.where == PREFIX:
                alignment = align.compare_prefixes(
                    self.sequence,
                    read_seq,
                    wildcard_ref=self.adapter_wildcards,
                    wildcard_query=self.read_wildcards)
            else:
                alignment = align.compare_suffixes(
                    self.sequence,
                    read_seq,
                    wildcard_ref=self.adapter_wildcards,
                    wildcard_query=self.read_wildcards)
        else:
            alignment = self.aligner.locate(read_seq)
            if self.debug:
                print(self.aligner.dpmatrix)  # pragma: no cover

        if alignment:
            astart, astop, rstart, rstop, matches, errors = alignment
            size = astop - astart
            if ((size >= self.min_overlap
                 and errors / size <= self.max_error_rate) and
                (self.max_rmp is None
                 or self.match_probability(matches, size) <= self.max_rmp)):
                return Match(astart, astop, rstart, rstop, matches, errors,
                             self._front_flag, self, read)

        return None
示例#4
0
    def match_to(self, read):
        """
        Attempt to match this adapter to the given read.

        Return an Match instance if a match was found;
        return None if no match was found given the matching criteria (minimum
        overlap length, maximum error rate).
        """
        read_seq = read.sequence.upper()
        
        # try to find an exact match first unless wildcards are allowed
        pos = -1
        if not self.adapter_wildcards:
            if self.where == PREFIX:
                pos = 0 if read_seq.startswith(self.sequence) else -1
            elif self.where == SUFFIX:
                pos = (len(read_seq) - len(self.sequence)) if read_seq.endswith(self.sequence) else -1
            else:
                pos = read_seq.find(self.sequence)
        
        if pos >= 0:
            l = len(self.sequence)
            return Match(0, l, pos, pos + l, l, 0, self._front_flag, self, read)
        
        # try approximate matching
        alignment = None
        if not self.indels and self.where in (PREFIX, SUFFIX):
            if self.where == PREFIX:
                alignment = align.compare_prefixes(self.sequence, read_seq,
                    wildcard_ref=self.adapter_wildcards, wildcard_query=self.read_wildcards)
            else:
                alignment = align.compare_suffixes(self.sequence, read_seq,
                    wildcard_ref=self.adapter_wildcards, wildcard_query=self.read_wildcards)
        else:
            alignment = self.aligner.locate(read_seq)
            if self.debug:
                print(self.aligner.dpmatrix)  # pragma: no cover
        
        if alignment:
            astart, astop, rstart, rstop, matches, errors = alignment
            size = astop - astart
            if (size >= self.min_overlap and errors / size <= self.max_error_rate and (
                    self.match_probability is None or
                    self.match_probability(matches, size) <= self.max_rmp)):
                return Match(
                    astart, astop, rstart, rstop, matches, errors,
                    self._front_flag, self, read)
        
        return None