def test_score_threshold_rule(kmer_alphabet, ref_kmer, threshold): """ Test if the similar k-mers given by :class:`ScoreThresholdRule` are equal to k-mers generated by a brute-force approach. """ matrix = align.SubstitutionMatrix.std_protein_matrix() ref_kmer_sequence = seq.ProteinSequence() ref_kmer_sequence.code = kmer_alphabet.split(ref_kmer) ref_sim_kmer_set = set() # Iterate through all possible k-mers for kmer in range(len(kmer_alphabet)): kmer_sequence = seq.ProteinSequence() kmer_sequence.code = kmer_alphabet.split(kmer) score = align.align_ungapped( ref_kmer_sequence, kmer_sequence, matrix, score_only=True ) # Add k-mer to list if the threshold score is reached if score >= threshold: ref_sim_kmer_set.add(kmer) test_rule = align.ScoreThresholdRule(matrix, threshold) test_sim_kmer_set = set(test_rule.similar_kmers(kmer_alphabet, ref_kmer)) assert test_sim_kmer_set == ref_sim_kmer_set
def test_invalid_kmer(kmer_alphabet, invalid_kmer): """ Expect an error when an invalid k-mer code is given as input. """ test_rule = align.ScoreThresholdRule( align.SubstitutionMatrix.std_protein_matrix(), 0 ) with pytest.raises(seq.AlphabetError): test_rule.similar_kmers(kmer_alphabet, invalid_kmer)
def test_invalid_matrix(kmer_alphabet): """ Expect an error when the k-mer alphabet is incompatible with the substitution matrix. """ test_rule = align.ScoreThresholdRule( align.SubstitutionMatrix.std_nucleotide_matrix(), 0 ) with pytest.raises(ValueError): test_rule.similar_kmers(kmer_alphabet, 0)
def _identity_rule(alphabet): score_matrix = np.full((len(alphabet),) * 2, -1, dtype=int) np.fill_diagonal(score_matrix, 0) matrix = align.SubstitutionMatrix(alphabet, alphabet, score_matrix) rule = align.ScoreThresholdRule(matrix, 0) return rule