Пример #1
0
def test_score_threshold_rule(kmer_alphabet, ref_kmer, threshold):
    """
    Test if the similar k-mers given by :class:`ScoreThresholdRule`
    are equal to k-mers generated by a brute-force approach.
    """
    matrix = align.SubstitutionMatrix.std_protein_matrix()
    
    ref_kmer_sequence = seq.ProteinSequence()
    ref_kmer_sequence.code = kmer_alphabet.split(ref_kmer)
    
    ref_sim_kmer_set = set()
    # Iterate through all possible k-mers 
    for kmer in range(len(kmer_alphabet)):
        kmer_sequence = seq.ProteinSequence()
        kmer_sequence.code = kmer_alphabet.split(kmer)
        score = align.align_ungapped(
            ref_kmer_sequence, kmer_sequence, matrix, score_only=True
        )
        # Add k-mer to list if the threshold score is reached
        if score >= threshold:
            ref_sim_kmer_set.add(kmer)
    
    test_rule = align.ScoreThresholdRule(matrix, threshold)
    test_sim_kmer_set = set(test_rule.similar_kmers(kmer_alphabet, ref_kmer))

    assert test_sim_kmer_set == ref_sim_kmer_set
Пример #2
0
def test_invalid_kmer(kmer_alphabet, invalid_kmer):
    """
    Expect an error when an invalid k-mer code is given as input.
    """
    test_rule = align.ScoreThresholdRule(
        align.SubstitutionMatrix.std_protein_matrix(), 0
    )
    with pytest.raises(seq.AlphabetError):
        test_rule.similar_kmers(kmer_alphabet, invalid_kmer)
Пример #3
0
def test_invalid_matrix(kmer_alphabet):
    """
    Expect an error when the k-mer alphabet is incompatible with the
    substitution matrix.
    """
    test_rule = align.ScoreThresholdRule(
        align.SubstitutionMatrix.std_nucleotide_matrix(), 0
    )
    with pytest.raises(ValueError):
        test_rule.similar_kmers(kmer_alphabet, 0)
Пример #4
0
def _identity_rule(alphabet):
    score_matrix = np.full((len(alphabet),) * 2, -1, dtype=int)
    np.fill_diagonal(score_matrix, 0)
    matrix = align.SubstitutionMatrix(alphabet, alphabet, score_matrix)
    rule = align.ScoreThresholdRule(matrix, 0)
    return rule