def test_run_blossum(self):
        pairs_to_result = {
            ('ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN', 'RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQTT'):
            0,
            ('ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN', 'ISDTEADIGSNLRWGCAAAGKPRPMVRWLRNGEPLASQNRVEVLA'):
            41,
            ('ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN', 'RRLIPAARGGEISILCQPRAAPKATILWSKGTEILGNSTRVTVTSD'):
            5,
            ('RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQTT', 'RRLIPAARGGEISILCQPRAAPKATILWSKGTEILGNSTRVTVTSD'):
            -4,
            ('ISDTEADIGSNLRWGCAAAGKPRPMVRWLRNGEPLASQNRVEVLA', 'RRLIPAARGGEISILCQPRAAPKATILWSKGTEILGNSTRVTVTSD'):
            18,
            ('RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQTT', 'ISDTEADIGSNLRWGCAAAGKPRPMVRWLRNGEPLASQNRVEVLA'):
            -5
        }

        sequence_file = '../data/guideline_tests/needlemanwunsch.fa'
        sequences = parse_fasta_files([sequence_file])
        gt = Gotoh(substitution_matrix=MatrixInfo.blosum62,
                   gap_penalty=11,
                   gap_extend=1,
                   similarity=True,
                   verbose=False,
                   complete_traceback=True)
        results = gt.pairwise_alignments(sequences)
        for result in results:
            seqs = (str(result.seq1), str(result.seq2))
            expected_score = pairs_to_result[seqs]
            self.assertEqual(result.score, expected_score)
            print(len(result.alignments))
Exemplo n.º 2
0
def test_guideline_blosum():
    """Test cases given on the guideline from 04.02.2019
    
    the results are not as provided by the guideline
    """
    gotoh = Gotoh()

    result, info = gotoh.run("data/xpgma_guideline.fasta",
                    "data/xpgma_guideline.fasta",
                    "data/blosum62.txt",
                    False,
                    11,
                    1,
                    True)

    # the results is a upper triangle matrix of shape n x n.
    seq1_seq2 = result[0][1]
    assert seq1_seq2[3] == 0
    assert len(seq1_seq2[2]) == 1
    assert seq1_seq2[2][0] == ('ILDMDVVEGSAARFDCKVEG_YPDPEVMWFKDDNP___VKESRHFQIDYDEEGN',
                               'RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFV____SQTT')

    seq1_seq3 = result[0][2]
    assert seq1_seq3[3] == 41
    assert len(seq1_seq3[2]) == 3
    assert seq1_seq3[2][0] == ('ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN',
                               'ISDTEADIGSNLRWGCAAAGKPRPMVRWLRNGEPLASQNRVEV_____LA')

    seq1_seq4 = result[0][3]
    assert seq1_seq4[3] == 5
    assert len(seq1_seq4[2]) == 1
    assert seq1_seq4[2][0] == ('ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN',
                               'RRLIPAARGGEISILCQPRAAPKATILWSKGTEILGNSTRVTVTSD____')

    seq2_seq3 = result[1][2]
    assert seq2_seq3[3] == -5
    assert len(seq2_seq3[2]) == 4
    assert seq2_seq3[2][0] == ('RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQTT',
                               'ISDTEADIGSNLRWGC_AAAGKPRPMVRWLRNG____EPLASQNRVEVLA')

    seq2_seq4 = result[1][3]
    assert seq2_seq4[3] == -4
    assert len(seq2_seq4[2]) == 2
    assert seq2_seq4[2][0] == ('RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQTT',
                               'RRLIPAARGGEISILCQPRA_APKATILW__SKGTEILGNSTRVTVT_SD')

    seq3_seq4 = result[2][3]
    assert seq3_seq4[3] == 18
    assert len(seq3_seq4[2]) == 1
    assert seq3_seq4[2][0] == ('ISDTEADIGSNLRWGCAAAGKPRPMVRWLRNGEPLASQNRVEVLA_',
                               'RRLIPAARGGEISILCQPRAAPKATILWSKGTEILGNSTRVTVTSD')
Exemplo n.º 3
0
def test_example_invalid_characters_fail():
    """This function does a negative test: it checks if it fails when it is supposed to.
    The reason for failure is non-amino acid characters in file 2 (error code 12)"""

    gt = Gotoh()
    seq_fasta_1 = os.path.join('data','sequences','seq1.fasta')
    seq_fasta_2 = os.path.join('data','sequences','Invalid_characters.fasta')
    with pytest.raises(SystemExit) as InvalidCharactersException:
        result = gt.run(seq_fasta_1,
                    seq_fasta_2,
                    'pam250',
                    -11,
                    -1,
                    False)
        (id_seq1, seq1, id_seq2, seq2, score, alignments, num_alignments) = result

        assert InvalidCharactersException.type == SystemExit
        assert InvalidCharactersException.code == 12
Exemplo n.º 4
0
def test_example_invalid_format_fail():
    """This function does a negative test: it checks if it fails when it is supposed to.
    The reason for the failure is invalid file format: the first line does not start with >"""

    gt = Gotoh()
    seq_fasta_1 = os.path.join('data','sequences','seq1.fasta')
    seq_fasta_2 = os.path.join('data','sequences','Invalid_format.fasta')
    with pytest.raises(SystemExit) as InvalidFileException:
        result = gt.run(seq_fasta_2,
                    seq_fasta_1,
                    'pam250',
                    -11,
                    -1,
                    False)
        (id_seq1, seq1, id_seq2, seq2, score, alignments, num_alignments) = result

        assert InvalidFileException.type == SystemExit
        assert InvalidFileException.code == 1
def test_example():
    """Test if run function can be called."""

    gt = Gotoh()
    result = gt.run("data/sequence1.fa",
                    "data/sequence2.fa",
                    "data/blosum62.txt",
                    5,
                    20,
                    False)
    (id_seq1, seq1, id_seq2, seq2, score, alignments) = result

    assert id_seq1 == "idA"
    assert id_seq2 == "idB"
    assert seq1 == "FancySequenceA"
    assert seq2 == "FancysequenceB"
    assert score == 1000
    assert alignments[0] == ("Fancy_SequenceA_",
                             "Fancys_equence_B")
Exemplo n.º 6
0
def test_example():
    """Test if run function can be called."""

    gt = Gotoh()
    result, info = gt.run("data/sequence1.fasta",
                    "data/sequence2.fasta",
                    "data/test_scoring_similarity.txt",
                    False,
                    5,
                    1,
                    True)

    assert result[0][0][0].id == "idA"
    assert result[0][0][1].id == "idB"
    assert str(result[0][0][0].seq) == "TCCGA"
    assert str(result[0][0][1].seq) == "TACGCAGA"
    assert result[0][0][3] == -4
    assert len(result[0][0][2]) == 3
    assert result[0][0][2][0] == ('T___CCGA', 'TACGCAGA') 
Exemplo n.º 7
0
def test_too_few_arguments():
    """This function does a negative test: it checks if it fails when it is supposed to.
    The reason for failure is non-amino acid characters in file 2 (error code 12)"""

    gt = Gotoh()
    seq_fasta_1 = os.path.join('data','sequences','seq1.fasta')
    seq_fasta_2 = os.path.join('data','sequences','seq2.fasta')
    # test is a variable which becomes True when there are too few arguments
    test = False
    try:
        with pytest.raises(SystemExit) as TooFewArguments:
            result = gt.run(seq_fasta_1,
                        seq_fasta_2,
                        'pam250',
                        -1,
                        False)
            (id_seq1, seq1, id_seq2, seq2, score, alignments, num_alignments) = result
    # A TypeError is thrown when there are too few arguments (we are missing 1 argument)
    except TypeError:
        test = True
    assert test == True
 def test_init(self):
     got = Gotoh()
     got.init_scoring_matrices("AAAC", "AAAC")
     print(got.scoring_matrix_D)
     print(got.scoring_matrix_P)
     print(got.scoring_matrix_Q)
     assert np.array_equal(
         got.scoring_matrix_D,
         np.array([[0., -12., -13., -14., -15.], [-12., 0., 0., 0., 0.],
                   [-13., 0., 0., 0., 0.], [-14., 0., 0., 0., 0.],
                   [-15., 0., 0., 0., 0.]]))
     assert np.array_equal(
         got.scoring_matrix_P,
         np.array([[-math.inf, -math.inf, -math.inf, -math.inf, -math.inf],
                   [-math.inf, 0., 0., 0., 0.], [-math.inf, 0., 0., 0., 0.],
                   [-math.inf, 0., 0., 0., 0.], [-math.inf, 0., 0., 0.,
                                                 0.]]))
     assert np.array_equal(
         got.scoring_matrix_Q,
         np.array([[-math.inf, -math.inf, -math.inf, -math.inf, -math.inf],
                   [-math.inf, 0., 0., 0., 0.], [-math.inf, 0., 0., 0., 0.],
                   [-math.inf, 0., 0., 0., 0.], [-math.inf, 0., 0., 0.,
                                                 0.]]))
Exemplo n.º 9
0
def test_example_success():
    """This calls the run method of the Needleman-Wunsch program
    and tests if it works as expected (positive test)"""
    gt = Gotoh()
    seq_fasta_1 = os.path.join('data','sequences','seq1.fasta')
    seq_fasta_2 = os.path.join('data','sequences','seq2.fasta')
    seq_fasta_3 = os.path.join('data','sequences','seq3.fasta')
    result = gt.run(seq_fasta_1,
                    seq_fasta_2,
                    'pam250',
                    -11,
                    -1,
                    True)
    (id_seq1, seq1, id_seq2, seq2, score, alignments,num_alignments) = result

    assert id_seq1 == "ID1"
    assert id_seq2 == "ID2"
    assert seq1 == "ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN"
    assert seq2 == "RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQTT"
    assert score == 33
    assert alignments== [['ILDMDVVEGSAARFDCKVEG-YPDPEVMWFKDDNP---VKESRHFQIDYDEEGN', 
    'RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFV----SQTT', ':::::::**::::::*:::: **:::::*:::::*   ::::***:    ::::'], ['ILDMDVVEGSAARFDCKVEG-YPDPEVMWFKDDNP---VKESRHFQIDYDEEGN', 'RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHF----VSQTT', ':::::::**::::::*:::: **:::::*:::::*   ::::***    :::::']]
    assert num_alignments == 2
    def test_scoring_blossum(self):
        """Testing score calculation using Blossum62 + Gap Penalty = -11, gap extend = -1"""

        print("######### Testing calculation of scoring matrix. ###########")
        got = Gotoh(substitution_matrix=MatrixInfo.blosum62,
                    gap_penalty=-11,
                    gap_extend=-1)
        print("############# Case 1 ##############")
        print("############# START ##############")
        seq1 = "A"
        seq2 = "A"
        got.calculate_scoring_matrix(seq1, seq2)
        print("SEQ1: %s" % seq1)
        print("SEQ2: %s" % seq2)
        print("RESULT:\n %s" % got.scoring_matrix_D)
        np.testing.assert_array_equal(got.scoring_matrix_D,
                                      np.array([[0., -12.0], [-12., 4.]]))
        print("############# FINISH ##############")
        print("############# Case 2 ##############")
        print("############# START ##############")
        seq1 = "A"
        seq2 = "AT"
        got.calculate_scoring_matrix(seq1, seq2)
        print("SEQ1: %s" % seq1)
        print("SEQ2: %s" % seq2)
        print("RESULT:\n %s" % got.scoring_matrix_D)
        np.testing.assert_array_equal(
            got.scoring_matrix_D, np.array([[0., -12., -13.], [-12., 4.,
                                                               -8.]]))
        print("############# FINISH ##############")
        print("############# Case 3 ##############")
        print("############# START ##############")
        seq1 = "RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQTT"
        seq2 = "ISDTEADIGSNLRWGCAAAGKPRPMVRWLRNGEPLASQNRVEVLA"
        got.calculate_scoring_matrix(seq1, seq2)
        print("SEQ1: %s" % seq1)
        print("SEQ2: %s" % seq2)
        print("RESULT:\n %s" % pprint.pformat(got.scoring_matrix_D))
        self.assertAlmostEqual(got.scoring_matrix_D[-1][-1], -5.0)
        print("############# FINISH ##############")
    def test_scoring_pam(self):
        """Testing score calculation using PAM250 + Gap Penalty = -11, gap extend = -1"""

        print("######### Testing calculation of scoring matrix. ###########")
        got = Gotoh(substitution_matrix=MatrixInfo.pam250,
                    gap_penalty=-11,
                    gap_extend=-1)
        print("############# Case 1 ##############")
        print("############# START ##############")
        seq1 = "A"
        seq2 = "A"
        got.calculate_scoring_matrix(seq1, seq2)
        print("SEQ1: %s" % seq1)
        print("SEQ2: %s" % seq2)
        print("RESULT:\n %s" % got.scoring_matrix_D)
        np.testing.assert_array_equal(got.scoring_matrix_D,
                                      np.array([[0., -12.], [-12., 2.]]))
        print("############# FINISH ##############")
        print("############# Case 2 ##############")
        print("############# START ##############")
        seq1 = "A"
        seq2 = "AT"
        got.calculate_scoring_matrix(seq1, seq2)
        print("SEQ1: %s" % seq1)
        print("SEQ2: %s" % seq2)
        print("RESULT:\n %s" % got.scoring_matrix_D)
        np.testing.assert_array_equal(
            got.scoring_matrix_D, np.array([[0., -12., -13.], [-12., 2.,
                                                               -10.]]))
        print("############# FINISH ##############")
        print("############# Case 3 ##############")
        print("############# START ##############")
        seq1 = "ILDMDVVEGSAARFDCKVEGYPDPEVMWFKDDNPVKESRHFQIDYDEEGN"
        seq2 = "RDPVKTHEGWGVMLPCNPPAHYPGLSYRWLLNEFPNFIPTDGRHFVSQTT"
        got.calculate_scoring_matrix(seq1, seq2)
        print("SEQ1: %s" % seq1)
        print("SEQ2: %s" % seq2)
        print("RESULT:\n %s" % pprint.pformat(got.scoring_matrix_D))
        self.assertAlmostEqual(got.scoring_matrix_D[-1][-1], 33)
        print("############# FINISH ##############")
    def let_compute(self, input):
        """
        Starts the algorithm with the right input parameters.

        Args:
            input: console input
        """
        algorithm = input[0]

        if algorithm == available_algorithms.GOTOH:
            procedure = Gotoh()
        elif algorithm == available_algorithms.NEEDLEMAN_WUNSCH:
            procedure = NeedlemanWunsch()
        elif algorithm == available_algorithms.NEEDLEMAN_WUNSCH_3D:
            procedure = NeedlemanWunsch3()
        elif algorithm == available_algorithms.NUSSINOV:
            procedure = Nussinov()

        procedure.compute(input)
def test_instance():
    """Check inheritance."""
    assert issubclass(Gotoh, GotohBase)
    assert isinstance(Gotoh(), GotohBase)