def test_run(self):
     # perform pairwise sequence alignments
     sequences = utils.parse_fasta_files(["../data/feng_test/feng1.fa"])
     # case 1
     print('####### WPGMA, PAM250, gap=8')
     feng = FengDoolittle(gap_penalty=8,
                          substitution_matrix=MatrixInfo.pam250,
                          clustering_method=Clustering.WPGMA)
     res = feng.run(sequences)
     expected_order = [
         "ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN",
         "ISDTEADIGSNLRWGCXAAAGKPRPMVRWLRNGEPLXASQNXRVEVXXLAX",
         "RRLIPAARGGEISILCQPRAAXPKATILWSKXGTEILGNXSTRVTVXTXSD",
         "RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQXTT"
     ]
     print(res.sequences)
     print(res.score)
     print('####### UPGMA, PAM250, gap=8')
     feng = FengDoolittle(gap_penalty=8,
                          substitution_matrix=MatrixInfo.pam250,
                          clustering_method=Clustering.UPGMA)
     res = feng.run(sequences)
     expected_order = [
         "ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN",
         "ISDTEADIGSNLRWGCXAAAGKPRPMVRWLRNGEPLXASQNXRVEVXXLAX",
         "RRLIPAARGGEISILCQPRAAXPKATILWSKXGTEILGNXSTRVTVXTXSD",
         "RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQXTT"
     ]
     print(res.sequences)
     print(res.score)
    def test_run_blossum(self):
        pairs_to_result = {
            ('ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN', 'RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQTT'):
            0,
            ('ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN', 'ISDTEADIGSNLRWGCAAAGKPRPMVRWLRNGEPLASQNRVEVLA'):
            41,
            ('ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN', 'RRLIPAARGGEISILCQPRAAPKATILWSKGTEILGNSTRVTVTSD'):
            5,
            ('RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQTT', 'RRLIPAARGGEISILCQPRAAPKATILWSKGTEILGNSTRVTVTSD'):
            -4,
            ('ISDTEADIGSNLRWGCAAAGKPRPMVRWLRNGEPLASQNRVEVLA', 'RRLIPAARGGEISILCQPRAAPKATILWSKGTEILGNSTRVTVTSD'):
            18,
            ('RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQTT', 'ISDTEADIGSNLRWGCAAAGKPRPMVRWLRNGEPLASQNRVEVLA'):
            -5
        }

        sequence_file = '../data/guideline_tests/needlemanwunsch.fa'
        sequences = parse_fasta_files([sequence_file])
        gt = Gotoh(substitution_matrix=MatrixInfo.blosum62,
                   gap_penalty=11,
                   gap_extend=1,
                   similarity=True,
                   verbose=False,
                   complete_traceback=True)
        results = gt.pairwise_alignments(sequences)
        for result in results:
            seqs = (str(result.seq1), str(result.seq2))
            expected_score = pairs_to_result[seqs]
            self.assertEqual(result.score, expected_score)
            print(len(result.alignments))
    def test_run_blossum(self):
        pairs_to_result = {
            ('ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN', 'RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQTT'):
            4,
            ('ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN', 'ISDTEADIGSNLRWGCAAAGKPRPMVRWLRNGEPLASQNRVEVLA'):
            37,
            ('ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN', 'RRLIPAARGGEISILCQPRAAPKATILWSKGTEILGNSTRVTVTSD'):
            -4,
            ('RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQTT', 'ISDTEADIGSNLRWGCAAAGKPRPMVRWLRNGEPLASQNRVEVLA'):
            3,
            ('RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQTT', 'RRLIPAARGGEISILCQPRAAPKATILWSKGTEILGNSTRVTVTSD'):
            9,
            ('ISDTEADIGSNLRWGCAAAGKPRPMVRWLRNGEPLASQNRVEVLA', 'RRLIPAARGGEISILCQPRAAPKATILWSKGTEILGNSTRVTVTSD'):
            24
        }

        sequence_file = '../data/guideline_tests/needlemanwunsch.fa'
        sequences = parse_fasta_files([sequence_file])
        # init the needleman
        settings = ScoringSettings(substitution_matrix=MatrixInfo.blosum62,
                                   gap_penalty=6,
                                   similarity=True)
        nw = NeedlemanWunsch(settings, complete_traceback=False, verbose=False)
        results = nw.pairwise_alignments(sequences)
        for result in results:
            seqs = (str(result.seq1.seq), str(result.seq2.seq))
            expected_score = pairs_to_result[seqs]
            self.assertEqual(result.score, expected_score)
 def test_calculate_guide_tree(self):
     nw = NeedlemanWunsch()
     sequences = utils.parse_fasta_files(["../data/xpgma/xpgma1.fa"])
     alignments = nw.pairwise_alignments(sequences)
     xpgma = Xpgma()
     xpgma.create_distance_matrix(alignments)
     guidetree = xpgma.calculate_guide_tree()
     expected = '((A:2.00,B:2.00):0.00,C:2.00)'
     expected_nodes = "{'A': A:2.00, 'B': B:2.00, 'C': C:2.00, 'AB': (A:2.00,B:2.00):0.00, 'ABC': ABC:NONE}"
     self.assertEqual(str(guidetree), expected)
     self.assertEqual(str(guidetree.nodes), expected_nodes)
 def test_convert_to_evolutionary_distances(self):
     # perform pairwise sequence alignments
     nw = NeedlemanWunsch()
     sequences = utils.parse_fasta_files(
         ["../data/feng_test/conversion.fa"])
     alignments = nw.pairwise_alignments(sequences)
     feng = FengDoolittle()
     # Convert the scores to approximate pairwise evolutionary distances.
     alignment = alignments[0]
     print(f'Alignment: {alignment} ')
     alignment.score = feng.convert_to_evolutionary_distances(alignment)
     print(f'Score: {alignment.score} ')
     self.assertAlmostEqual(first=2.70805020110221, second=alignment.score)
 def test_create_distance_matrix(self):
     nw = NeedlemanWunsch()
     sequences = utils.parse_fasta_files(["../data/xpgma/xpgma1.fa"])
     alignments = nw.pairwise_alignments(sequences)
     xpgma = Xpgma()
     xpgma.create_distance_matrix(alignments)
     self.assertDictEqual(
         xpgma.distances, {
             'A': {
                 'B': 4.0,
                 'C': 4.0
             },
             'B': {
                 'A': 4.0,
                 'C': 4.0
             },
             'C': {
                 'A': 4.0,
                 'B': 4.0
             }
         })