def test_run(self): # perform pairwise sequence alignments sequences = utils.parse_fasta_files(["../data/feng_test/feng1.fa"]) # case 1 print('####### WPGMA, PAM250, gap=8') feng = FengDoolittle(gap_penalty=8, substitution_matrix=MatrixInfo.pam250, clustering_method=Clustering.WPGMA) res = feng.run(sequences) expected_order = [ "ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN", "ISDTEADIGSNLRWGCXAAAGKPRPMVRWLRNGEPLXASQNXRVEVXXLAX", "RRLIPAARGGEISILCQPRAAXPKATILWSKXGTEILGNXSTRVTVXTXSD", "RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQXTT" ] print(res.sequences) print(res.score) print('####### UPGMA, PAM250, gap=8') feng = FengDoolittle(gap_penalty=8, substitution_matrix=MatrixInfo.pam250, clustering_method=Clustering.UPGMA) res = feng.run(sequences) expected_order = [ "ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN", "ISDTEADIGSNLRWGCXAAAGKPRPMVRWLRNGEPLXASQNXRVEVXXLAX", "RRLIPAARGGEISILCQPRAAXPKATILWSKXGTEILGNXSTRVTVXTXSD", "RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQXTT" ] print(res.sequences) print(res.score)
def test_run_blossum(self): pairs_to_result = { ('ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN', 'RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQTT'): 0, ('ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN', 'ISDTEADIGSNLRWGCAAAGKPRPMVRWLRNGEPLASQNRVEVLA'): 41, ('ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN', 'RRLIPAARGGEISILCQPRAAPKATILWSKGTEILGNSTRVTVTSD'): 5, ('RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQTT', 'RRLIPAARGGEISILCQPRAAPKATILWSKGTEILGNSTRVTVTSD'): -4, ('ISDTEADIGSNLRWGCAAAGKPRPMVRWLRNGEPLASQNRVEVLA', 'RRLIPAARGGEISILCQPRAAPKATILWSKGTEILGNSTRVTVTSD'): 18, ('RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQTT', 'ISDTEADIGSNLRWGCAAAGKPRPMVRWLRNGEPLASQNRVEVLA'): -5 } sequence_file = '../data/guideline_tests/needlemanwunsch.fa' sequences = parse_fasta_files([sequence_file]) gt = Gotoh(substitution_matrix=MatrixInfo.blosum62, gap_penalty=11, gap_extend=1, similarity=True, verbose=False, complete_traceback=True) results = gt.pairwise_alignments(sequences) for result in results: seqs = (str(result.seq1), str(result.seq2)) expected_score = pairs_to_result[seqs] self.assertEqual(result.score, expected_score) print(len(result.alignments))
def test_run_blossum(self): pairs_to_result = { ('ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN', 'RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQTT'): 4, ('ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN', 'ISDTEADIGSNLRWGCAAAGKPRPMVRWLRNGEPLASQNRVEVLA'): 37, ('ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN', 'RRLIPAARGGEISILCQPRAAPKATILWSKGTEILGNSTRVTVTSD'): -4, ('RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQTT', 'ISDTEADIGSNLRWGCAAAGKPRPMVRWLRNGEPLASQNRVEVLA'): 3, ('RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQTT', 'RRLIPAARGGEISILCQPRAAPKATILWSKGTEILGNSTRVTVTSD'): 9, ('ISDTEADIGSNLRWGCAAAGKPRPMVRWLRNGEPLASQNRVEVLA', 'RRLIPAARGGEISILCQPRAAPKATILWSKGTEILGNSTRVTVTSD'): 24 } sequence_file = '../data/guideline_tests/needlemanwunsch.fa' sequences = parse_fasta_files([sequence_file]) # init the needleman settings = ScoringSettings(substitution_matrix=MatrixInfo.blosum62, gap_penalty=6, similarity=True) nw = NeedlemanWunsch(settings, complete_traceback=False, verbose=False) results = nw.pairwise_alignments(sequences) for result in results: seqs = (str(result.seq1.seq), str(result.seq2.seq)) expected_score = pairs_to_result[seqs] self.assertEqual(result.score, expected_score)
def test_calculate_guide_tree(self): nw = NeedlemanWunsch() sequences = utils.parse_fasta_files(["../data/xpgma/xpgma1.fa"]) alignments = nw.pairwise_alignments(sequences) xpgma = Xpgma() xpgma.create_distance_matrix(alignments) guidetree = xpgma.calculate_guide_tree() expected = '((A:2.00,B:2.00):0.00,C:2.00)' expected_nodes = "{'A': A:2.00, 'B': B:2.00, 'C': C:2.00, 'AB': (A:2.00,B:2.00):0.00, 'ABC': ABC:NONE}" self.assertEqual(str(guidetree), expected) self.assertEqual(str(guidetree.nodes), expected_nodes)
def test_convert_to_evolutionary_distances(self): # perform pairwise sequence alignments nw = NeedlemanWunsch() sequences = utils.parse_fasta_files( ["../data/feng_test/conversion.fa"]) alignments = nw.pairwise_alignments(sequences) feng = FengDoolittle() # Convert the scores to approximate pairwise evolutionary distances. alignment = alignments[0] print(f'Alignment: {alignment} ') alignment.score = feng.convert_to_evolutionary_distances(alignment) print(f'Score: {alignment.score} ') self.assertAlmostEqual(first=2.70805020110221, second=alignment.score)
def test_create_distance_matrix(self): nw = NeedlemanWunsch() sequences = utils.parse_fasta_files(["../data/xpgma/xpgma1.fa"]) alignments = nw.pairwise_alignments(sequences) xpgma = Xpgma() xpgma.create_distance_matrix(alignments) self.assertDictEqual( xpgma.distances, { 'A': { 'B': 4.0, 'C': 4.0 }, 'B': { 'A': 4.0, 'C': 4.0 }, 'C': { 'A': 4.0, 'B': 4.0 } })