Exemplo n.º 1
0
def question_two():
    """
    Compute comparison of two human and fruitfly local alignment sequences and return percentage of matches between both.
    """
    scoring_matrix = read_scoring_matrix(PAM50_URL)
    local_results = question_one()
    pax_seq = read_protein(CONSENSUS_PAX_URL)
    dash, new_human, new_fly = "-", "", ""
    percentages = []

    #remove dashes from human and fruit fly sequences
    for char in local_results[1]:
        if char != dash:
            new_human += char
    print("Old human seq: " + local_results[1])
    print("New human seq: " + new_human)
    for char in local_results[2]:
        if char != dash:
            new_fly += char
    print("Old fly seq: " + local_results[2])
    print("New fly seq: " + new_fly)

    #compute alignment matrices and calculate global alignments between human, fruit and pax
    print("Computing alignment matrices and global alignments...")
    align_matrix = prj4.compute_alignment_matrix(new_human, pax_seq,
                                                 scoring_matrix, True)
    result_human_comp = prj4.compute_global_alignment(new_human, pax_seq,
                                                      scoring_matrix,
                                                      align_matrix)
    #print result_human_comp
    align_matrix = prj4.compute_alignment_matrix(new_fly, pax_seq,
                                                 scoring_matrix, True)
    result_fly_comp = prj4.compute_global_alignment(new_fly, pax_seq,
                                                    scoring_matrix,
                                                    align_matrix)
    #print result_fly_comp

    #calculate percantage of matches between human, fruit, and pax
    matches = 0
    for index in range(len(result_human_comp[2])):
        if result_human_comp[1][index] == result_human_comp[2][index]:
            matches += 1
    percentages.append(matches / float(len(result_human_comp[2])))
    matches = 0
    for index in range(len(result_fly_comp[2])):
        if result_fly_comp[1][index] == result_fly_comp[2][index]:
            matches += 1
    percentages.append(matches / float(len(result_fly_comp[2])))

    #return the two percentages in a list
    return percentages
Exemplo n.º 2
0
def check_spelling(checked_word, dist, word_list):
    '''
    Iterates through word_list and returns the set of all words that are within 
    edit distance dist of the string checked_word.
    '''
    # Set constants 
    ALPHABET = set(list(string.ascii_lowercase))
    DIAG_SCORE = 2
    OFF_DIAG_SCORE = 1
    DASH_SCORE = 0
    # contruct scoring matrix over all lower case letters
    scoring_matrix = student.build_scoring_matrix(ALPHABET, DIAG_SCORE, OFF_DIAG_SCORE, DASH_SCORE)
    # Init list to store words
    close_words = []
    # Loop over word in word_list
    for word in word_list:
        # compute alignment matrix
        alignment_matrix = student.compute_alignment_matrix(checked_word, word, scoring_matrix, True)
        # compute score of global alignments
        score, align_x, align_y = student.compute_global_alignment(checked_word, word, scoring_matrix, alignment_matrix)
        # calculate edit distance
        edit_distance = len(checked_word) + len(word) - score
        # Compare edit_distance and dist
        if edit_distance <= dist:
            # save word
            close_words.append(word)
    return close_words
Exemplo n.º 3
0
def question_two():
    """
    Compute comparison of two human and fruitfly local alignment sequences and return percentage of matches between both.
    """
    scoring_matrix = read_scoring_matrix(PAM50_URL)
    local_results = question_one()
    pax_seq = read_protein(CONSENSUS_PAX_URL)
    dash, new_human, new_fly = "-", "", ""
    percentages = []

    #remove dashes from human and fruit fly sequences
    for char in local_results[1]:
        if char != dash:
            new_human += char
    print "Old human seq: " + local_results[1]
    print "New human seq: " + new_human
    for char in local_results[2]:
        if char != dash:
            new_fly += char
    print "Old fly seq: " + local_results[2]
    print "New fly seq: " + new_fly

    #compute alignment matrices and calculate global alignments between human, fruit and pax
    print "Computing alignment matrices and global alignments..."
    align_matrix = prj4.compute_alignment_matrix(new_human, pax_seq, scoring_matrix, True)
    result_human_comp = prj4.compute_global_alignment(new_human, pax_seq, scoring_matrix, align_matrix)
    #print result_human_comp
    align_matrix = prj4.compute_alignment_matrix(new_fly, pax_seq, scoring_matrix, True)
    result_fly_comp = prj4.compute_global_alignment(new_fly, pax_seq, scoring_matrix, align_matrix)
    #print result_fly_comp

    #calculate percantage of matches between human, fruit, and pax
    matches = 0
    for index in xrange(len(result_human_comp[2])):
        if result_human_comp[1][index] == result_human_comp[2][index]:
            matches += 1
    percentages.append(matches / float(len(result_human_comp[2])))
    matches = 0
    for index in xrange(len(result_fly_comp[2])):
        if result_fly_comp[1][index] == result_fly_comp[2][index]:
            matches += 1 
    percentages.append(matches / float(len(result_fly_comp[2])))

    #return the two percentages in a list
    return percentages
Exemplo n.º 4
0
def answer_Q7():
    alphabet = set(['A', 'C', 'T', 'G'])
    diag_score = 2
    off_diag_score = 1
    dash_score = 0
    
    seq_x = 'AA' 
    seq_y = 'TAAT'
    
    scoring_matrix = student.build_scoring_matrix(alphabet, diag_score, off_diag_score, dash_score)
    alignment_matrix = student.compute_alignment_matrix(seq_x, seq_y, scoring_matrix, True)
    
    score, align_x, align_y = student.compute_global_alignment(seq_x, seq_y, scoring_matrix, alignment_matrix)
    
    edit_distance = len(seq_x) + len(seq_y) - score
    
    return (diag_score, off_diag_score, dash_score)
Exemplo n.º 5
0
def percent_match(local_alignment):
    ''' 
    Computes the percent similarilty between a local alignment to the 
    global alignment of the PAX sequence.
    '''
    # remove the '-' from the local alignment
    local_alignment = local_alignment.replace('-', '')
    # load the PAM50 scoring matrix
    pam50_scoring_matrix = provided.read_scoring_matrix(PAM50_URL)
    # load the consensus sequence
    consensus_sequence = provided.read_protein(CONSENSUS_PAX_URL)
    # compute the global alignment
    alignment_matrix = student.compute_alignment_matrix(local_alignment, consensus_sequence, pam50_scoring_matrix, True)
    # compute the global alignment
    score, global_alignment, consensus_alignment = student.compute_global_alignment(local_alignment, consensus_sequence, pam50_scoring_matrix, alignment_matrix)
    # Init the variable to store matches
    match = 0
    # loop over each character
    for char in range(len(global_alignment)):
        # compare characters between the two alignments
        if global_alignment[char] == consensus_alignment[char]:
            # increase the match score by 1
            match += 1
    return round(match/float(len(global_alignment))*100, 2)
	print (Project_4.compute_alignment_matrix('', '', {'A': {'A': 6, 'C': 2, '-': -4, 'T': 2, 'G': 2},
		'C': {'A': 2, 'C': 6, '-': -4, 'T': 2, 'G': 2}, '-': {'A': -4, 'C': -4, '-': -4, 'T': -4, 'G': -4},
		'T': {'A': 2, 'C': 2, '-': -4, 'T': 6, 'G': 2}, 'G': {'A': 2, 'C': 2, '-': -4, 'T': 2, 'G': 6}}, True))
	#expected [[0]] but received []
	print (Project_4.compute_alignment_matrix('A', 'A', {'A': {'A': 6, 'C': 2, '-': -4, 'T': 2, 'G': 2},
		'C': {'A': 2, 'C': 6, '-': -4, 'T': 2, 'G': 2}, '-': {'A': -4, 'C': -4, '-': -4, 'T': -4, 'G': -4},
		'T': {'A': 2, 'C': 2, '-': -4, 'T': 6, 'G': 2}, 'G': {'A': 2, 'C': 2, '-': -4, 'T': 2, 'G': 6}}, True))
	#expected [[0, -4], [-4, 6]]
	print (Project_4.compute_alignment_matrix('ATG', 'ACG', {'A': {'A': 6, 'C': 2, '-': -4, 'T': 2, 'G': 2},
		'C': {'A': 2, 'C': 6, '-': -4, 'T': 2, 'G': 2}, '-': {'A': -4, 'C': -4, '-': -4, 'T': -4, 'G': -4},
		'T': {'A': 2, 'C': 2, '-': -4, 'T': 6, 'G': 2}, 'G': {'A': 2, 'C': 2, '-': -4, 'T': 2, 'G': 6}}, True))
	#expected [[0, -4, -8, -12], [-4, 6, 2, -2], [-8, 2, 8, 4], [-12, -2, 4, 14]]

if TEST3:
	print (Project_4.compute_global_alignment('', '', {'A': {'A': 6, 'C': 2, '-': -4, 'T': 2, 'G': 2},
		'C': {'A': 2, 'C': 6, '-': -4, 'T': 2, 'G': 2}, '-': {'A': -4, 'C': -4, '-': -4, 'T': -4, 'G': -4},
		'T': {'A': 2, 'C': 2, '-': -4, 'T': 6, 'G': 2}, 'G': {'A': 2, 'C': 2, '-': -4, 'T': 2, 'G': 6}}, [[0]]))
	#expected tuple of length 3
	print (Project_4.compute_global_alignment('A', 'A', {'A': {'A': 6, 'C': 2, '-': -4, 'T': 2, 'G': 2},
		'C': {'A': 2, 'C': 6, '-': -4, 'T': 2, 'G': 2}, '-': {'A': -4, 'C': -4, '-': -4, 'T': -4, 'G': -4},
		'T': {'A': 2, 'C': 2, '-': -4, 'T': 6, 'G': 2}, 'G': {'A': 2, 'C': 2, '-': -4, 'T': 2, 'G': 6}}, [[0, -4], [-4, 6]]))
	#expected 6, 'A', 'A'
	print (Project_4.compute_global_alignment('ACTACT', 'AGCTA', {'A': {'A': 2, 'C': 1, '-': 0, 'T': 1, 'G': 1},
		'C': {'A': 1, 'C': 2, '-': 0, 'T': 1, 'G': 1}, '-': {'A': 0, 'C': 0, '-': 0, 'T': 0, 'G': 0},
		'T': {'A': 1, 'C': 1, '-': 0, 'T': 2, 'G': 1}, 'G': {'A': 1, 'C': 1, '-': 0, 'T': 1, 'G': 2}},
		[[0, 0, 0, 0, 0, 0], [0, 2, 2, 2, 2, 2], [0, 2, 3, 4, 4, 4], [0, 2, 3, 4, 6, 6], [0, 2, 3, 4, 6, 8],
		[0, 2, 3, 5, 6, 8], [0, 2, 3, 5, 7, 8]]))
	#expected 8, 'ACTACT', 'AGCTA',
	print (Project_4.compute_global_alignment('abddcdeffgh', 'aabcddefghij',
		{'-': {'-': -1, 'a': -1, 'c': -1, 'b': -1, 'e': -1, 'd': -1, 'g': -1, 'f': -1, 'i': -1, 'h': -1, 'k': -1, 'j': -1,
		'm': -1, 'l': -1, 'o': -1, 'n': -1, 'q': -1, 'p': -1, 's': -1, 'r': -1, 'u': -1, 't': -1, 'w': -1, 'v': -1, 'y': -1, 'x': -1,
Question 2
"""
ali_human = result[1]
ali_fly = result[2]
seq_con = provided.read_protein(provided.CONSENSUS_PAX_URL)

ali_human = ali_human.replace('-', '')
ali_fly = ali_fly.replace('-', '')

global_alignment_mx_human = Project_4.compute_alignment_matrix(
    ali_human, seq_con, scoring_matrix, True)
global_alignment_mx_fly = Project_4.compute_alignment_matrix(
    ali_fly, seq_con, scoring_matrix, True)

result2_human = Project_4.compute_global_alignment(ali_human, seq_con,
                                                   scoring_matrix,
                                                   global_alignment_mx_human)
result2_fly = Project_4.compute_global_alignment(ali_fly, seq_con,
                                                 scoring_matrix,
                                                 global_alignment_mx_fly)

print 'Score: ' + str(result2_human[0])
print 'Local Human: ' + result2_human[1]
print 'Consensus: ' + result2_human[2]
print
print 'Score: ' + str(result2_fly[0])
print 'Local Fly: ' + result2_fly[1]
print 'Consensus: ' + result2_fly[2]

len_human = len(result2_human[1])
len_fly = len(result2_fly[1])
Exemplo n.º 8
0
	print Project_4.compute_alignment_matrix('', '', {'A': {'A': 6, 'C': 2, '-': -4, 'T': 2, 'G': 2}, 
		'C': {'A': 2, 'C': 6, '-': -4, 'T': 2, 'G': 2}, '-': {'A': -4, 'C': -4, '-': -4, 'T': -4, 'G': -4}, 
		'T': {'A': 2, 'C': 2, '-': -4, 'T': 6, 'G': 2}, 'G': {'A': 2, 'C': 2, '-': -4, 'T': 2, 'G': 6}}, True)
	#expected [[0]] but received []
	print Project_4.compute_alignment_matrix('A', 'A', {'A': {'A': 6, 'C': 2, '-': -4, 'T': 2, 'G': 2}, 
		'C': {'A': 2, 'C': 6, '-': -4, 'T': 2, 'G': 2}, '-': {'A': -4, 'C': -4, '-': -4, 'T': -4, 'G': -4}, 
		'T': {'A': 2, 'C': 2, '-': -4, 'T': 6, 'G': 2}, 'G': {'A': 2, 'C': 2, '-': -4, 'T': 2, 'G': 6}}, True)
	#expected [[0, -4], [-4, 6]]
	print Project_4.compute_alignment_matrix('ATG', 'ACG', {'A': {'A': 6, 'C': 2, '-': -4, 'T': 2, 'G': 2}, 
		'C': {'A': 2, 'C': 6, '-': -4, 'T': 2, 'G': 2}, '-': {'A': -4, 'C': -4, '-': -4, 'T': -4, 'G': -4}, 
		'T': {'A': 2, 'C': 2, '-': -4, 'T': 6, 'G': 2}, 'G': {'A': 2, 'C': 2, '-': -4, 'T': 2, 'G': 6}}, True)
	#expected [[0, -4, -8, -12], [-4, 6, 2, -2], [-8, 2, 8, 4], [-12, -2, 4, 14]]

if TEST3:
	print Project_4.compute_global_alignment('', '', {'A': {'A': 6, 'C': 2, '-': -4, 'T': 2, 'G': 2}, 
		'C': {'A': 2, 'C': 6, '-': -4, 'T': 2, 'G': 2}, '-': {'A': -4, 'C': -4, '-': -4, 'T': -4, 'G': -4}, 
		'T': {'A': 2, 'C': 2, '-': -4, 'T': 6, 'G': 2}, 'G': {'A': 2, 'C': 2, '-': -4, 'T': 2, 'G': 6}}, [[0]])
	#expected tuple of length 3 
	print Project_4.compute_global_alignment('A', 'A', {'A': {'A': 6, 'C': 2, '-': -4, 'T': 2, 'G': 2}, 
		'C': {'A': 2, 'C': 6, '-': -4, 'T': 2, 'G': 2}, '-': {'A': -4, 'C': -4, '-': -4, 'T': -4, 'G': -4}, 
		'T': {'A': 2, 'C': 2, '-': -4, 'T': 6, 'G': 2}, 'G': {'A': 2, 'C': 2, '-': -4, 'T': 2, 'G': 6}}, [[0, -4], [-4, 6]])
	#expected 6, 'A', 'A'
	print Project_4.compute_global_alignment('ACTACT', 'AGCTA', {'A': {'A': 2, 'C': 1, '-': 0, 'T': 1, 'G': 1}, 
		'C': {'A': 1, 'C': 2, '-': 0, 'T': 1, 'G': 1}, '-': {'A': 0, 'C': 0, '-': 0, 'T': 0, 'G': 0}, 
		'T': {'A': 1, 'C': 1, '-': 0, 'T': 2, 'G': 1}, 'G': {'A': 1, 'C': 1, '-': 0, 'T': 1, 'G': 2}}, 
		[[0, 0, 0, 0, 0, 0], [0, 2, 2, 2, 2, 2], [0, 2, 3, 4, 4, 4], [0, 2, 3, 4, 6, 6], [0, 2, 3, 4, 6, 8], 
		[0, 2, 3, 5, 6, 8], [0, 2, 3, 5, 7, 8]])
	#expected 8, 'ACTACT', 'AGCTA',
	print Project_4.compute_global_alignment('abddcdeffgh', 'aabcddefghij', 
		{'-': {'-': -1, 'a': -1, 'c': -1, 'b': -1, 'e': -1, 'd': -1, 'g': -1, 'f': -1, 'i': -1, 'h': -1, 'k': -1, 'j': -1, 
		'm': -1, 'l': -1, 'o': -1, 'n': -1, 'q': -1, 'p': -1, 's': -1, 'r': -1, 'u': -1, 't': -1, 'w': -1, 'v': -1, 'y': -1, 'x': -1, 

"""
Question 2
"""
ali_human = result[1]
ali_fly = result[2]
seq_con = provided.read_protein(provided.CONSENSUS_PAX_URL)

ali_human = ali_human.replace('-', '')
ali_fly = ali_fly.replace('-', '')

global_alignment_mx_human = Project_4.compute_alignment_matrix(ali_human, seq_con, scoring_matrix, True) 
global_alignment_mx_fly = Project_4.compute_alignment_matrix(ali_fly, seq_con, scoring_matrix, True)

result2_human = Project_4.compute_global_alignment(ali_human, seq_con, scoring_matrix, global_alignment_mx_human)
result2_fly = Project_4.compute_global_alignment(ali_fly, seq_con, scoring_matrix, global_alignment_mx_fly)


print 'Score: ' + str(result2_human[0])
print 'Local Human: ' + result2_human[1]
print 'Consensus: ' + result2_human[2]
print
print 'Score: ' + str(result2_fly[0])
print 'Local Fly: ' + result2_fly[1]
print 'Consensus: ' + result2_fly[2]


len_human = len(result2_human[1])
len_fly = len(result2_fly[1])