Пример #1
0
def test_create_viterbi():
    """ This test needs to be fixed, the HMM cannot be initialised in this way.
    """
    assert 0, "Test not fixed"
    my_hmm = HMM()
    my_hmm.states = ['H', 'F']

    # Initialisation
    my_hmm.p_0 = {"H": 0.6, "F": 0.4}

    # Feed Values to p_t
    #my_hmm.p_t["START"] = {"H": 0.6, "F": 0.4}
    my_hmm.p_t["H"] = {"H": 0.7, "F": 0.3}
    my_hmm.p_t["F"] = {"H": 0.4, "F": 0.6}

    # emissions
    my_hmm.emissions = ["D", "C", "N"]

    # emission probabilities
    my_hmm.p_e['H'] = {"D": 0.1, "C": 0.4, "N": 0.5}
    my_hmm.p_e['F'] = {"D": 0.6, "C": 0.3, "N": 0.1}

    my_viterbi = Viterbi(my_hmm, "NCD")
    if my_viterbi.viterbi() == "HHF":
        print("Test SUCCESSFUL")
    else:
        print("Test FAILED")
Пример #2
0
def test_create_viterbi():
    """ This test needs to be fixed, the HMM cannot be initialised in this way.
    """
    assert 0, "Test not fixed"
    my_hmm = HMM()
    my_hmm.states = ['H', 'F']

    # Initialisation
    my_hmm.p_0 = {"H": 0.6, "F": 0.4}

    # Feed Values to p_t
    #my_hmm.p_t["START"] = {"H": 0.6, "F": 0.4}
    my_hmm.p_t["H"] = {"H": 0.7, "F": 0.3}
    my_hmm.p_t["F"] = {"H": 0.4, "F": 0.6}

    # emissions
    my_hmm.emissions = ["D", "C", "N"]

    # emission probabilities
    my_hmm.p_e['H'] = {"D":0.1, "C":0.4, "N":0.5}
    my_hmm.p_e['F'] = {"D":0.6, "C":0.3, "N":0.1}

    my_viterbi = Viterbi(my_hmm, "NCD")
    if my_viterbi.viterbi() == "HHF":
        print("Test SUCCESSFUL")
    else:
        print("Test FAILED")
Пример #3
0
def test_hmm_pickle():

    test_repeat = repeat.Repeat(msa=TEST_REPEAT_MSA_DOUBLE)
    test_hmm = HMM.create(input_format='repeat', repeat=test_repeat)

    test_pickle = os.path.join(path(), "test.pickle")
    test_hmm.write(test_pickle, 'pickle')
    test_hmm_new = HMM.create(input_format='pickle', file=test_pickle)

    assert test_hmm.hmmer == test_hmm_new.hmmer
    assert test_hmm.alphabet == test_hmm_new.alphabet

    if os.path.exists(test_pickle):
        os.remove(test_pickle)
Пример #4
0
def test_hmm_pickle():

    test_repeat = repeat.Repeat(msa = TEST_REPEAT_MSA_DOUBLE)
    test_hmm = HMM.create(input_format = 'repeat', repeat = test_repeat)

    test_pickle = os.path.join(path(), "test.pickle")
    test_hmm.write(test_pickle, 'pickle')
    test_hmm_new = HMM.create(input_format = 'pickle', file = test_pickle)

    assert test_hmm.hmmer == test_hmm_new.hmmer
    assert test_hmm.alphabet == test_hmm_new.alphabet

    if os.path.exists(test_pickle):
        os.remove(test_pickle)
Пример #5
0
def test_too_big_hmms():

    test_repeat = repeat.Repeat(msa = TEST_RESULT_REPEAT_MSA_LONG)
    test_hmm = HMM.create(input_format = 'repeat', repeat = test_repeat)
    test_seq = sequence.Sequence(TEST_SEQUENCE_A)
    test_optimized_repeat = test_seq.detect([test_hmm])
    assert type(test_optimized_repeat) == repeat_list.RepeatList
    assert len(test_optimized_repeat.repeats) == 0

    test_repeat = repeat.Repeat(msa = TEST_RESULT_REPEAT_MSA_SUPER_LONG)
    test_hmm = HMM.create(input_format = 'repeat', repeat = test_repeat)
    test_seq = sequence.Sequence(TEST_SEQUENCE_SUPER_LONG_A)
    test_optimized_repeat = test_seq.detect([test_hmm])
    assert type(test_optimized_repeat) == repeat_list.RepeatList
    assert len(test_optimized_repeat.repeats) == 0
Пример #6
0
def test_too_big_hmms():

    test_repeat = repeat.Repeat(msa=TEST_RESULT_REPEAT_MSA_LONG)
    test_hmm = HMM.create(input_format='repeat', repeat=test_repeat)
    test_seq = sequence.Sequence(TEST_SEQUENCE_A)
    test_optimized_repeat = test_seq.detect([test_hmm])
    assert type(test_optimized_repeat) == repeat_list.RepeatList
    assert len(test_optimized_repeat.repeats) == 0

    test_repeat = repeat.Repeat(msa=TEST_RESULT_REPEAT_MSA_SUPER_LONG)
    test_hmm = HMM.create(input_format='repeat', repeat=test_repeat)
    test_seq = sequence.Sequence(TEST_SEQUENCE_SUPER_LONG_A)
    test_optimized_repeat = test_seq.detect([test_hmm])
    assert type(test_optimized_repeat) == repeat_list.RepeatList
    assert len(test_optimized_repeat.repeats) == 0
Пример #7
0
def test_viterbi():

    # {Test_name: [Original_TR_MSA, Sequence, Viterbi_path, Refined_TR_MSA], ... }
    TEST = {
        "Single": [["A", "A", "A"], "AAAAAA",
                   ["M1", "M1", "M1", "M1", "M1", "M1"],
                   ["A", "A", "A", "A", "A", "A"]],
        "Double": [["AA", "AA"], "AAAAAA",
                   ["M1", "M2", "M1", "M2", "M1", "M2"], ["AA", "AA", "AA"]],
        "Long": [["ADKL", "ADKL"], "GYRADKLADKLADKL",
                 [
                     "N", "N", "N", "M1", "M2", "M3", "M4", "M1", "M2", "M3",
                     "M4", "M1", "M2", "M3", "M4"
                 ], ["ADKL", "ADKL", "ADKL"]]
    }

    for test, p in TEST.items():
        test_repeat = repeat.Repeat(msa=p[0])
        test_hmm = HMM.create(input_format="repeat", repeat=test_repeat)

        for iHMM in [test_hmm]:
            # Detect TRs on self.seq with hmm using the Viterbi algorithm.
            most_likely_path = iHMM.viterbi(p[1])
            assert type(most_likely_path) == list
            assert most_likely_path == p[2]

            unaligned_msa = hmm_path_to_non_aligned_tandem_repeat_units(
                p[1], most_likely_path, iHMM.l_effective)
            assert unaligned_msa == p[3]

            aligned_msa = repeat_align.realign_repeat(unaligned_msa)
            assert aligned_msa == p[3]
Пример #8
0
def test_create_HMM_from_Repeat():

    test_repeat = repeat.Repeat(msa=TEST_REPEAT_MSA_DOUBLE)
    test_hmm = HMM.create(input_format='repeat', repeat=test_repeat)

    assert test_hmm.l_effective == 2
    assert set(test_hmm.states) == set(TEST_HMM_STATES_DOUBLE)
    assert test_hmm.p_0 == TEST_HMM_P0_DOUBLE
    #assert test_hmm.p_t == TEST_HMM_P0_DOUBLE

    test_repeat = repeat.Repeat(msa=TEST_REPEAT_MSA_SINGLE)
    test_hmm = HMM.create(input_format='repeat', repeat=test_repeat)

    assert test_hmm.l_effective == 1
    assert test_hmm.states == TEST_HMM_STATES_SINGLE
    assert test_hmm.p_0 == TEST_HMM_P0_SINGLE
Пример #9
0
def test_sequence_pickle():

    test_seq = sequence.Sequence(TEST_SEQUENCE)

    test_pickle = os.path.join(path(), "test.pickle")
    test_seq.write(test_pickle, 'pickle')
    test_seq_new = sequence.Sequence.create(test_pickle, 'pickle')

    assert test_seq.seq == test_seq_new.seq

    test_repeat = repeat.Repeat(msa=TEST_REPEAT_MSA_DOUBLE)
    test_hmm = HMM.create(input_format='repeat', repeat=test_repeat)
    test_optimized_repeat = test_seq.detect([test_hmm])
    test_seq.set_repeatlist(test_optimized_repeat, TEST_SEQUENCE_TAG)

    assert type(test_optimized_repeat) == repeat_list.RepeatList
    assert list(test_seq.d_repeatlist.keys()) == [TEST_SEQUENCE_TAG]
    assert type(
        test_seq.d_repeatlist[TEST_SEQUENCE_TAG]) == repeat_list.RepeatList
    assert test_seq.d_repeatlist[TEST_SEQUENCE_TAG].repeats

    test_retrieved_repeatlist = test_seq.get_repeatlist(TEST_SEQUENCE_TAG)
    assert test_retrieved_repeatlist == test_optimized_repeat

    test_seq.write(test_pickle, 'pickle')
    test_seq_new = sequence.Sequence.create(test_pickle, 'pickle')

    assert test_seq.d_repeatlist.keys() == test_seq_new.d_repeatlist.keys()
    assert test_seq.d_repeatlist[TEST_SEQUENCE_TAG].repeats[
        0].msa == test_seq_new.d_repeatlist[TEST_SEQUENCE_TAG].repeats[0].msa

    if os.path.exists(test_pickle):
        os.remove(test_pickle)
Пример #10
0
def test_detect_repeats_with_repeat():

    test_repeat = repeat.Repeat(msa = TEST_REPEAT_MSA_DOUBLE)
    test_hmm = HMM.create(input_format = 'repeat', repeat = test_repeat)
    test_seq = sequence.Sequence(TEST_SEQUENCE)
    test_optimized_repeat = test_seq.detect([test_hmm])
    assert type(test_optimized_repeat) == repeat_list.RepeatList
    assert len(test_optimized_repeat.repeats) == 1
    assert test_optimized_repeat.repeats[0].msa == TEST_RESULT_REPEAT_MSA_DOUBLE

    test_repeat = repeat.Repeat(msa = TEST_REPEAT_MSA_SINGLE)
    test_hmm = HMM.create(input_format = 'repeat', repeat = test_repeat)
    test_optimized_repeat = test_seq.detect([test_hmm])
    assert type(test_optimized_repeat) == repeat_list.RepeatList
    assert len(test_optimized_repeat.repeats) == 1
    assert test_optimized_repeat.repeats[0].msa == TEST_RESULT_REPEAT_MSA_SINGLE
Пример #11
0
def test_sequence_pickle():

    test_seq = sequence.Sequence(TEST_SEQUENCE)

    test_pickle = os.path.join(path(), "test.pickle")
    test_seq.write(test_pickle, 'pickle')
    test_seq_new = sequence.Sequence.create(test_pickle, 'pickle')

    assert test_seq.seq == test_seq_new.seq

    test_repeat = repeat.Repeat(msa = TEST_REPEAT_MSA_DOUBLE)
    test_hmm = HMM.create(input_format = 'repeat', repeat = test_repeat)
    test_optimized_repeat = test_seq.detect([test_hmm])
    test_seq.set_repeatlist(test_optimized_repeat, TEST_SEQUENCE_TAG)

    assert type(test_optimized_repeat) == repeat_list.RepeatList
    assert list(test_seq.d_repeatlist.keys()) == [TEST_SEQUENCE_TAG]
    assert type(test_seq.d_repeatlist[TEST_SEQUENCE_TAG]) == repeat_list.RepeatList
    assert test_seq.d_repeatlist[TEST_SEQUENCE_TAG].repeats

    test_retrieved_repeatlist = test_seq.get_repeatlist(TEST_SEQUENCE_TAG)
    assert test_retrieved_repeatlist == test_optimized_repeat

    test_seq.write(test_pickle, 'pickle')
    test_seq_new = sequence.Sequence.create(test_pickle, 'pickle')

    assert test_seq.d_repeatlist.keys() == test_seq_new.d_repeatlist.keys()
    assert test_seq.d_repeatlist[TEST_SEQUENCE_TAG].repeats[0].msa == test_seq_new.d_repeatlist[TEST_SEQUENCE_TAG].repeats[0].msa

    if os.path.exists(test_pickle):
        os.remove(test_pickle)
Пример #12
0
def test_single_hmm_without_id_read(path):
    test_dict_list = list(HMM.read(os.path.join(path, TEST_FILE_WITHOUT_ID)))
    assert len(test_dict_list) == 1
    test_dict = test_dict_list[0]

    compare_carcinustatin(test_dict)

    assert test_dict[ID_KEY_NAME] is None
Пример #13
0
def test_create_HMM_from_Repeat():

    test_repeat = repeat.Repeat(msa = TEST_REPEAT_MSA_DOUBLE)
    test_hmm = HMM.create(input_format = 'repeat', repeat = test_repeat)

    assert test_hmm.l_effective == 2
    assert set(test_hmm.states) == set(TEST_HMM_STATES_DOUBLE)
    assert test_hmm.p_0 == TEST_HMM_P0_DOUBLE
    #assert test_hmm.p_t == TEST_HMM_P0_DOUBLE


    test_repeat = repeat.Repeat(msa = TEST_REPEAT_MSA_SINGLE)
    test_hmm = HMM.create(input_format = 'repeat', repeat = test_repeat)

    assert test_hmm.l_effective == 1
    assert test_hmm.states == TEST_HMM_STATES_SINGLE
    assert test_hmm.p_0 == TEST_HMM_P0_SINGLE
Пример #14
0
def test_single_hmm_without_id_read(path):
    test_dict_list = list(HMM.read(os.path.join(path, TEST_FILE_WITHOUT_ID)))
    assert len(test_dict_list) == 1
    test_dict = test_dict_list[0]

    compare_carcinustatin(test_dict)

    assert test_dict[ID_KEY_NAME] is None
Пример #15
0
def test_detect_repeats_with_repeat():

    test_repeat = repeat.Repeat(msa=TEST_REPEAT_MSA_DOUBLE)
    test_hmm = HMM.create(input_format='repeat', repeat=test_repeat)
    test_seq = sequence.Sequence(TEST_SEQUENCE)
    test_optimized_repeat = test_seq.detect([test_hmm])
    assert type(test_optimized_repeat) == repeat_list.RepeatList
    assert len(test_optimized_repeat.repeats) == 1
    assert test_optimized_repeat.repeats[
        0].msa == TEST_RESULT_REPEAT_MSA_DOUBLE

    test_repeat = repeat.Repeat(msa=TEST_REPEAT_MSA_SINGLE)
    test_hmm = HMM.create(input_format='repeat', repeat=test_repeat)
    test_optimized_repeat = test_seq.detect([test_hmm])
    assert type(test_optimized_repeat) == repeat_list.RepeatList
    assert len(test_optimized_repeat.repeats) == 1
    assert test_optimized_repeat.repeats[
        0].msa == TEST_RESULT_REPEAT_MSA_SINGLE
Пример #16
0
def test_init_with_repeat():
    """ This test needs to be fixed.
    """
    assert 0, "Test not fixed"
    my_TR = Repeat(begin = 0, msa = ['A-G', 'ACG', 'ACG'], sequence_type = 'DNA')
    my_hmm = HMM.create(repeat=my_TR)
    from . import sequence
    my_sequence = sequence.Sequence()
    my_viterbi = Viterbi(my_hmm, my_sequence.sequence)
    v = my_viterbi.viterbi()
    "".join(v)
Пример #17
0
def test_init_with_repeat():
    """ This test needs to be fixed.
    """
    assert 0, "Test not fixed"
    my_TR = Repeat(begin=0, msa=['A-G', 'ACG', 'ACG'], sequence_type='DNA')
    my_hmm = HMM.create(repeat=my_TR)
    from . import sequence
    my_sequence = sequence.Sequence()
    my_viterbi = Viterbi(my_hmm, my_sequence.sequence)
    v = my_viterbi.viterbi()
    "".join(v)
Пример #18
0
def test_viterbi():

    # {Test_name: [Original_TR_MSA, Sequence, Viterbi_path, Refined_TR_MSA], ... }
    TEST = {"Single": [["A","A","A"], "AAAAAA", ["M1","M1","M1","M1","M1","M1"], ["A","A","A","A","A","A"]],
        "Double": [["AA","AA"], "AAAAAA", ["M1","M2","M1","M2","M1","M2"], ["AA","AA","AA"]],
        "Long": [["ADKL","ADKL"], "GYRADKLADKLADKL", ["N","N","N","M1","M2","M3","M4","M1","M2","M3","M4","M1","M2","M3","M4"], ["ADKL","ADKL","ADKL"]]
        }

    for test, p in TEST.items():
        test_repeat = repeat.Repeat(msa = p[0])
        test_hmm = HMM.create(input_format = "repeat", repeat = test_repeat)

        for iHMM in [test_hmm]:
            # Detect TRs on self.seq with hmm using the Viterbi algorithm.
            most_likely_path = iHMM.viterbi(p[1])
            assert type(most_likely_path) == list
            assert most_likely_path == p[2]

            unaligned_msa = hmm_path_to_non_aligned_tandem_repeat_units(p[1], most_likely_path, iHMM.l_effective)
            assert unaligned_msa == p[3]

            aligned_msa = repeat_align.realign_repeat(unaligned_msa)
            assert aligned_msa == p[3]
Пример #19
0
def test_detect_repeats_with_hmm():
    test_hmm = HMM.create(input_format='hmmer',
                          file=os.path.join(path(), TEST_FILE_WITH_ID))
    test_seq = sequence.Sequence(TEST_SEQUENCE)
    test_optimized_repeat = test_seq.detect([test_hmm])
Пример #20
0
def test_single_hmm_no_id_with_query_read(path):
    test_dict_list = list(HMM.read(os.path.join(path, TEST_FILE_WITHOUT_ID),
                              id=CARCINUSTATIN_ID))
    assert len(test_dict_list) == 0
Пример #21
0
def test_detect_repeats_with_hmm():
    test_hmm = HMM.create(input_format = 'hmmer', file = os.path.join(path(), TEST_FILE_WITH_ID))
    test_seq = sequence.Sequence(TEST_SEQUENCE)
    test_optimized_repeat = test_seq.detect([test_hmm])
Пример #22
0
def test_single_hmm_with_wrong_query_read(path):
    test_dict_list = list(HMM.read(os.path.join(path, TEST_FILE_WITH_ID),
                              id=WRONG_CARCINUSTATIN_ID))
    assert len(test_dict_list) == 0
Пример #23
0
def test_single_hmm_with_wrong_query_read(path):
    test_dict_list = list(
        HMM.read(os.path.join(path, TEST_FILE_WITH_ID),
                 id=WRONG_CARCINUSTATIN_ID))
    assert len(test_dict_list) == 0
Пример #24
0
def test_single_hmm_no_id_with_query_read(path):
    test_dict_list = list(
        HMM.read(os.path.join(path, TEST_FILE_WITHOUT_ID),
                 id=CARCINUSTATIN_ID))
    assert len(test_dict_list) == 0