def check_spelling(checked_word, dist, word_list):
    """
    Function that iterates through word_list and returns a set of words
    that are within an edit distance dist of the string checked_word.
    """

    # Initialize variables for this function
    _words_found = set()
    _len_checked = len(checked_word)
    _counter = 0

    # Generate the scoring matrix needed to compare the word
    _str_alphabet = "abcdefghijklmnopqrstuvwxyz"
    _alphabet = set(_str_alphabet)
    scoring_matrix = sa.build_scoring_matrix(_alphabet, 2, 1, 0)

    # Iterate through the word list to find the words within distance
    for _word in word_list:
        _counter += 1
        _alignment_matrix = sa.compute_alignment_matrix(checked_word, _word, scoring_matrix)
        if ( _len_checked + len(_word) -
             _alignment_matrix[_len_checked][len(_word)] ) <= dist:
            _words_found.add(_word)
        if _counter % 1000 == 0:
            print "..........", _counter, "words processed."

    return _words_found
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials):
    """
    Function that takes as input two sequences seq_x and seq_y, a scoring matrix,
    and a number of trials num_trials. This functions returns a dictionary
    scoring_distribution that represents an un-normalized distribution generated
    by performing a local alignment process num_trials times.
    """

    # Initialize variables used in this function
    _scoring_distribution = {}
    _trials_to_go = num_trials
    _rand_y = seq_y

    while _trials_to_go > 0:
        # Create a random permutation of the sequence seq_y
        _lst_rand_y = list(_rand_y)
        random.shuffle(_lst_rand_y)
        _rand_y = ""
        for _char in _lst_rand_y:
            _rand_y += _char
        
        # Compute the maximum score for the local alignment of seq_x and rand_y
        _alignment_matrix = sa.compute_alignment_matrix(seq_x, _rand_y, scoring_matrix, False)
        _num_rows = len(_alignment_matrix)
        _num_cols = len(_alignment_matrix[0])
        _max_score = -1
        for _index1 in range(_num_rows):
            for _index2 in range(_num_cols):
                if ( _alignment_matrix[_index1][_index2] > _max_score ):
                    _max_score = _alignment_matrix[_index1][_index2]

        # Count the scores generated in the dictionary scoring_distribution
        _scoring_distribution[_max_score] = _scoring_distribution.get(_max_score, 0) + 1
        if ( num_trials - _trials_to_go + 1 ) % 1 == 0:
            print ".......... Trial", ( num_trials - _trials_to_go + 1 ), "completed."
        _trials_to_go -= 1

    return _scoring_distribution
print
    
# Read the FruitflyEyelessProtein and print the result
fruitfly_protein = rfs.read_protein(FRUITFLY_EYELESS_URL)
print "Fruitfly protein:"
print fruitfly_protein
print "Length fruitfly protein =", len(fruitfly_protein)
print
    
# Read the PAM50 scoring matrix and print the result
scoring_matrix = rfs.read_scoring_matrix(PAM50_URL)
#print_scoring_matrix(scoring_matrix)
#print
    
# Compute local alignment matrix for the two proteins
alignment_matrix = sa.compute_alignment_matrix(human_protein, fruitfly_protein, scoring_matrix, False)
#print_alignment_matrix(alignment_matrix, human_protein, fruitfly_protein)
#print

# Compute and print local alignment
local_alignment = sa.compute_local_alignment(human_protein, fruitfly_protein, scoring_matrix, alignment_matrix)
print
print "==================> Local alignment:"
print ".......... Local alignment score:    ", local_alignment[0]
print ".......... Human protein sequence:   ", local_alignment[1]
if local_alignment[1] == HUMAN_RESULT:
    print ">>>>>>>>>>>>>>>>>>>> THIS IS THE RIGHT SEQUENCE"
print ".......... Fruitfly protein sequence:", local_alignment[2]
if local_alignment[2] == FRUITFLY_RESULT:
    print ">>>>>>>>>>>>>>>>>>>> THIS IS THE RIGHT SEQUENCE"
print ".......... Length aligned sequence:  ", len(local_alignment[1])
print "Consensus PAX domain protein:"
print consensus_pax
print "Length consensus PAX domain protein =", len(consensus_pax)
print

# Read the PAM50 scoring matrix and print the result
scoring_matrix = rfs.read_scoring_matrix(PAM50_URL)

# Computations for the human protein sequence
# . remove all '-' from the sequence
lst_of_strings = HUMAN_LOCAL.split("-")
human_local = ""
for substring in lst_of_strings:
    human_local += substring
# . compute the alignment between this sequence and the ConsensusPAXDomain sequence
alignment_matrix = sa.compute_alignment_matrix(human_local, consensus_pax, scoring_matrix)
global_alignment = sa.compute_global_alignment(human_local, consensus_pax, scoring_matrix, alignment_matrix)
# . determine percentage of agreement
agreements = 0
for index in range(len(global_alignment[1])):
    if global_alignment[1][index] == global_alignment[2][index]:
        agreements += 1
print ">>>>>>>>>>>>>>> Percentage of agreement human local    =", (
    float(agreements) / len(global_alignment[1])
) * 100, "%"

# Computations for the fruitfly protein sequence
# . remove all '-' from the sequence
lst_of_strings = FRUITFLY_LOCAL.split("-")
fruitfly_local = ""
for substring in lst_of_strings: