def test_compute_local_alignment(): """ Test the function compute_global_alignment function """ suite = poc_simpletest.TestSuite() scoring_matrix = proj4_solution.build_scoring_matrix(set(["A", "C", "T", "G"]), 10, 4, -6) alignment_matrix = proj4_solution.compute_alignment_matrix("AA", "TAAT", scoring_matrix, False) local_alignment_sequence = proj4_solution.compute_local_alignment("AA", "TAAT", scoring_matrix, alignment_matrix) print local_alignment_sequence suite.run_test(local_alignment_sequence, (20, "AA", "AA"), "Error Found") scoring_matrix = proj4_solution.build_scoring_matrix(set(["A", "C", "T", "G"]), 10, 2, -4) alignment_matrix = proj4_solution.compute_alignment_matrix("---AC-C--", "TTTACACGG", scoring_matrix, False) local_alignment_sequence = proj4_solution.compute_local_alignment( "---AC-C--", "TTTACACGG", scoring_matrix, alignment_matrix ) print local_alignment_sequence suite.run_test(local_alignment_sequence, (26, "AC-C", "ACAC"), "Error Found") local_alignment_sequence = proj4_solution.compute_local_alignment( "A", "A", { "A": {"A": 6, "C": 2, "-": -4, "T": 2, "G": 2}, "C": {"A": 2, "C": 6, "-": -4, "T": 2, "G": 2}, "-": {"A": -4, "C": -4, "-": -4, "T": -4, "G": -4}, "G": {"A": 2, "C": 2, "-": -4, "T": 2, "G": 6}, }, [[0, 0], [0, 6]], ) print local_alignment_sequence suite.run_test(local_alignment_sequence, (6, "A", "A"), "Error Found") suite.report_results()
def test_compute_local_alignment(): """ Test the function compute_global_alignment function """ suite = poc_simpletest.TestSuite() scoring_matrix = proj4_solution.build_scoring_matrix( set(['A', 'C', 'T', 'G']), 10, 4, -6) alignment_matrix = proj4_solution.compute_alignment_matrix( 'AA', 'TAAT', scoring_matrix, False) local_alignment_sequence = proj4_solution.compute_local_alignment( 'AA', 'TAAT', scoring_matrix, alignment_matrix) print local_alignment_sequence suite.run_test(local_alignment_sequence, (20, 'AA', 'AA'), "Error Found") scoring_matrix = proj4_solution.build_scoring_matrix( set(['A', 'C', 'T', 'G']), 10, 2, -4) alignment_matrix = proj4_solution.compute_alignment_matrix( '---AC-C--', 'TTTACACGG', scoring_matrix, False) local_alignment_sequence = proj4_solution.compute_local_alignment( '---AC-C--', 'TTTACACGG', scoring_matrix, alignment_matrix) print local_alignment_sequence suite.run_test(local_alignment_sequence, (26, 'AC-C', 'ACAC'), "Error Found") local_alignment_sequence = proj4_solution.compute_local_alignment( 'A', 'A', { 'A': { 'A': 6, 'C': 2, '-': -4, 'T': 2, 'G': 2 }, 'C': { 'A': 2, 'C': 6, '-': -4, 'T': 2, 'G': 2 }, '-': { 'A': -4, 'C': -4, '-': -4, 'T': -4, 'G': -4 }, 'G': { 'A': 2, 'C': 2, '-': -4, 'T': 2, 'G': 6 } }, [[0, 0], [0, 6]]) print local_alignment_sequence suite.run_test(local_alignment_sequence, (6, 'A', 'A'), "Error Found") suite.report_results()
def test_compute_global_alignment(): """ Test the function compute_global_alignment function """ suite = poc_simpletest.TestSuite() scoring_matrix = proj4_solution.build_scoring_matrix(set(["A", "C", "T", "G"]), 10, 4, -6) alignment_matrix = proj4_solution.compute_alignment_matrix("AA", "TAAT", scoring_matrix, True) global_alignment_sequence = proj4_solution.compute_global_alignment("AA", "TAAT", scoring_matrix, alignment_matrix) print global_alignment_sequence suite.run_test(global_alignment_sequence, (8, "-AA-", "TAAT"), "Error Found") global_alignment_sequence = proj4_solution.compute_global_alignment( "ACTACT", "AGCTA", { "A": {"A": 2, "C": 1, "-": 0, "T": 1, "G": 1}, "C": {"A": 1, "C": 2, "-": 0, "T": 1, "G": 1}, "-": {"A": 0, "C": 0, "-": 0, "T": 0, "G": 0}, "T": {"A": 1, "C": 1, "-": 0, "T": 2, "G": 1}, "G": {"A": 1, "C": 1, "-": 0, "T": 1, "G": 2}, }, [ [0, 0, 0, 0, 0, 0], [0, 2, 2, 2, 2, 2], [0, 2, 3, 4, 4, 4], [0, 2, 3, 4, 6, 6], [0, 2, 3, 4, 6, 8], [0, 2, 3, 5, 6, 8], [0, 2, 3, 5, 7, 8], ], ) print global_alignment_sequence suite.run_test(global_alignment_sequence, (8, "A-CTACT", "AGCTA--"), "Error Found") global_alignment_sequence = proj4_solution.compute_global_alignment( "ACTACT", "GGACTGCTTCTGG", { "A": {"A": 2, "C": 1, "-": 0, "T": 1, "G": 1}, "C": {"A": 1, "C": 2, "-": 0, "T": 1, "G": 1}, "-": {"A": 0, "C": 0, "-": 0, "T": 0, "G": 0}, "T": {"A": 1, "C": 1, "-": 0, "T": 2, "G": 1}, "G": {"A": 1, "C": 1, "-": 0, "T": 1, "G": 2}, }, [ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], [0, 1, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4], [0, 1, 2, 3, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6], [0, 1, 2, 4, 4, 6, 7, 7, 7, 7, 7, 7, 7, 7], [0, 1, 2, 4, 6, 6, 7, 9, 9, 9, 9, 9, 9, 9], [0, 1, 2, 4, 6, 8, 8, 9, 11, 11, 11, 11, 11, 11], ], ) print global_alignment_sequence suite.run_test(global_alignment_sequence, (11, "--A---CTACT--", "GGACTGCTTCTGG"), "Error Found") suite.report_results()
def compute_edit_distance(word1, word2, scoring_matrix): """ Question 7 :return: The edit_distance between word1 and word2 """ alignment_matrix = student.compute_alignment_matrix(word1, word2, scoring_matrix, True) global_score = alignment_matrix[len(word1)][len(word2)] edit_distance = len(word1) + len(word2) - global_score return edit_distance
def compute_edit_distance(word1, word2, scoring_matrix): """ Question 7 :return: The edit_distance between word1 and word2 """ alignment_matrix = student.compute_alignment_matrix( word1, word2, scoring_matrix, True) global_score = alignment_matrix[len(word1)][len(word2)] edit_distance = len(word1) + len(word2) - global_score return edit_distance
def local_alignment_eyeless_protein(): """ Question: 1 """ human_eyeless_seq = provided.read_protein(provided.HUMAN_EYELESS_URL) fruitfly_eyeless_seq = provided.read_protein(provided.FRUITFLY_EYELESS_URL) scoring_matrix = provided.read_scoring_matrix(provided.PAM50_URL) alignment_matrix = student.compute_alignment_matrix(human_eyeless_seq, fruitfly_eyeless_seq, scoring_matrix, False) local_alignment = student.compute_local_alignment(human_eyeless_seq, fruitfly_eyeless_seq, scoring_matrix, alignment_matrix) return local_alignment
def test_compute_alignment_matrix(): """ Test the function compute_alignment_matrix function """ suite = poc_simpletest.TestSuite() scoring_matrix = proj4_solution.build_scoring_matrix(set(["A", "C", "T", "G"]), 10, 4, -6) global_alignment_matrix = proj4_solution.compute_alignment_matrix("AA", "TAAT", scoring_matrix, False) print global_alignment_matrix suite.run_test(global_alignment_matrix[1][2], 4, "Error Found") suite.report_results()
def test_compute_alignment_matrix(): """ Test the function compute_alignment_matrix function """ suite = poc_simpletest.TestSuite() scoring_matrix = proj4_solution.build_scoring_matrix( set(['A', 'C', 'T', 'G']), 10, 4, -6) global_alignment_matrix = proj4_solution.compute_alignment_matrix( 'AA', 'TAAT', scoring_matrix, False) print global_alignment_matrix suite.run_test(global_alignment_matrix[1][2], 4, "Error Found") suite.report_results()
def local_alignment_eyeless_protein(): """ Question: 1 """ human_eyeless_seq = provided.read_protein(provided.HUMAN_EYELESS_URL) fruitfly_eyeless_seq = provided.read_protein(provided.FRUITFLY_EYELESS_URL) scoring_matrix = provided.read_scoring_matrix(provided.PAM50_URL) alignment_matrix = student.compute_alignment_matrix( human_eyeless_seq, fruitfly_eyeless_seq, scoring_matrix, False) local_alignment = student.compute_local_alignment(human_eyeless_seq, fruitfly_eyeless_seq, scoring_matrix, alignment_matrix) return local_alignment
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials): """ Question: 4: Compute null distribution """ scoring_distribution = {} for _ in range(num_trials): rand_y = list(seq_y) random.shuffle(rand_y) rand_y = "".join(rand_y) print _ alignment_matrix = student.compute_alignment_matrix(seq_x, rand_y, scoring_matrix, False) max_score = max(map(max, alignment_matrix)) if scoring_distribution.has_key(max_score): scoring_distribution[max_score] += 1 else: scoring_distribution[max_score] = 1 return scoring_distribution
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials): """ Question: 4: Compute null distribution """ scoring_distribution = {} for _ in range(num_trials): rand_y = list(seq_y) random.shuffle(rand_y) rand_y = "".join(rand_y) print _ alignment_matrix = student.compute_alignment_matrix( seq_x, rand_y, scoring_matrix, False) max_score = max(map(max, alignment_matrix)) if scoring_distribution.has_key(max_score): scoring_distribution[max_score] += 1 else: scoring_distribution[max_score] = 1 return scoring_distribution
def global_alignment_consensus(): """ Question: 2 """ ans_similar = [] local_alignments = local_alignment_eyeless_protein() consensus_seq = provided.read_protein(provided.CONSENSUS_PAX_URL) scoring_matrix = provided.read_scoring_matrix(provided.PAM50_URL) for idx in range(1, 3): seq_x = local_alignments[idx] seq_x = seq_x.replace("-", "") alignment_matrix = student.compute_alignment_matrix(seq_x, consensus_seq, scoring_matrix, True) global_alignment = student.compute_global_alignment(seq_x, consensus_seq, scoring_matrix, alignment_matrix) similar_count = 0 for letter1, letter2 in zip(global_alignment[1], global_alignment[2]): if letter1 == letter2: similar_count += 1 ans_similar.append(float(similar_count * 100) / len(global_alignment[1])) return ans_similar
def global_alignment_consensus(): """ Question: 2 """ ans_similar = [] local_alignments = local_alignment_eyeless_protein() consensus_seq = provided.read_protein(provided.CONSENSUS_PAX_URL) scoring_matrix = provided.read_scoring_matrix(provided.PAM50_URL) for idx in range(1, 3): seq_x = local_alignments[idx] seq_x = seq_x.replace("-", "") alignment_matrix = student.compute_alignment_matrix( seq_x, consensus_seq, scoring_matrix, True) global_alignment = student.compute_global_alignment( seq_x, consensus_seq, scoring_matrix, alignment_matrix) similar_count = 0 for letter1, letter2 in zip(global_alignment[1], global_alignment[2]): if letter1 == letter2: similar_count += 1 ans_similar.append( float(similar_count * 100) / len(global_alignment[1])) return ans_similar
def test_compute_global_alignment(): """ Test the function compute_global_alignment function """ suite = poc_simpletest.TestSuite() scoring_matrix = proj4_solution.build_scoring_matrix( set(['A', 'C', 'T', 'G']), 10, 4, -6) alignment_matrix = proj4_solution.compute_alignment_matrix( 'AA', 'TAAT', scoring_matrix, True) global_alignment_sequence = proj4_solution.compute_global_alignment( 'AA', 'TAAT', scoring_matrix, alignment_matrix) print global_alignment_sequence suite.run_test(global_alignment_sequence, (8, '-AA-', 'TAAT'), "Error Found") global_alignment_sequence = proj4_solution.compute_global_alignment( 'ACTACT', 'AGCTA', { 'A': { 'A': 2, 'C': 1, '-': 0, 'T': 1, 'G': 1 }, 'C': { 'A': 1, 'C': 2, '-': 0, 'T': 1, 'G': 1 }, '-': { 'A': 0, 'C': 0, '-': 0, 'T': 0, 'G': 0 }, 'T': { 'A': 1, 'C': 1, '-': 0, 'T': 2, 'G': 1 }, 'G': { 'A': 1, 'C': 1, '-': 0, 'T': 1, 'G': 2 } }, [[0, 0, 0, 0, 0, 0], [0, 2, 2, 2, 2, 2], [0, 2, 3, 4, 4, 4], [0, 2, 3, 4, 6, 6], [0, 2, 3, 4, 6, 8], [0, 2, 3, 5, 6, 8], [0, 2, 3, 5, 7, 8]]) print global_alignment_sequence suite.run_test(global_alignment_sequence, (8, 'A-CTACT', 'AGCTA--'), "Error Found") global_alignment_sequence = proj4_solution.compute_global_alignment( 'ACTACT', 'GGACTGCTTCTGG', { 'A': { 'A': 2, 'C': 1, '-': 0, 'T': 1, 'G': 1 }, 'C': { 'A': 1, 'C': 2, '-': 0, 'T': 1, 'G': 1 }, '-': { 'A': 0, 'C': 0, '-': 0, 'T': 0, 'G': 0 }, 'T': { 'A': 1, 'C': 1, '-': 0, 'T': 2, 'G': 1 }, 'G': { 'A': 1, 'C': 1, '-': 0, 'T': 1, 'G': 2 } }, [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], [0, 1, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4], [0, 1, 2, 3, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6], [0, 1, 2, 4, 4, 6, 7, 7, 7, 7, 7, 7, 7, 7], [0, 1, 2, 4, 6, 6, 7, 9, 9, 9, 9, 9, 9, 9], [0, 1, 2, 4, 6, 8, 8, 9, 11, 11, 11, 11, 11, 11]]) print global_alignment_sequence suite.run_test(global_alignment_sequence, (11, '--A---CTACT--', 'GGACTGCTTCTGG'), "Error Found") suite.report_results()