def test_create_viterbi(): """ This test needs to be fixed, the HMM cannot be initialised in this way. """ assert 0, "Test not fixed" my_hmm = HMM() my_hmm.states = ['H', 'F'] # Initialisation my_hmm.p_0 = {"H": 0.6, "F": 0.4} # Feed Values to p_t #my_hmm.p_t["START"] = {"H": 0.6, "F": 0.4} my_hmm.p_t["H"] = {"H": 0.7, "F": 0.3} my_hmm.p_t["F"] = {"H": 0.4, "F": 0.6} # emissions my_hmm.emissions = ["D", "C", "N"] # emission probabilities my_hmm.p_e['H'] = {"D": 0.1, "C": 0.4, "N": 0.5} my_hmm.p_e['F'] = {"D": 0.6, "C": 0.3, "N": 0.1} my_viterbi = Viterbi(my_hmm, "NCD") if my_viterbi.viterbi() == "HHF": print("Test SUCCESSFUL") else: print("Test FAILED")
def test_create_viterbi(): """ This test needs to be fixed, the HMM cannot be initialised in this way. """ assert 0, "Test not fixed" my_hmm = HMM() my_hmm.states = ['H', 'F'] # Initialisation my_hmm.p_0 = {"H": 0.6, "F": 0.4} # Feed Values to p_t #my_hmm.p_t["START"] = {"H": 0.6, "F": 0.4} my_hmm.p_t["H"] = {"H": 0.7, "F": 0.3} my_hmm.p_t["F"] = {"H": 0.4, "F": 0.6} # emissions my_hmm.emissions = ["D", "C", "N"] # emission probabilities my_hmm.p_e['H'] = {"D":0.1, "C":0.4, "N":0.5} my_hmm.p_e['F'] = {"D":0.6, "C":0.3, "N":0.1} my_viterbi = Viterbi(my_hmm, "NCD") if my_viterbi.viterbi() == "HHF": print("Test SUCCESSFUL") else: print("Test FAILED")
def test_hmm_pickle(): test_repeat = repeat.Repeat(msa=TEST_REPEAT_MSA_DOUBLE) test_hmm = HMM.create(input_format='repeat', repeat=test_repeat) test_pickle = os.path.join(path(), "test.pickle") test_hmm.write(test_pickle, 'pickle') test_hmm_new = HMM.create(input_format='pickle', file=test_pickle) assert test_hmm.hmmer == test_hmm_new.hmmer assert test_hmm.alphabet == test_hmm_new.alphabet if os.path.exists(test_pickle): os.remove(test_pickle)
def test_hmm_pickle(): test_repeat = repeat.Repeat(msa = TEST_REPEAT_MSA_DOUBLE) test_hmm = HMM.create(input_format = 'repeat', repeat = test_repeat) test_pickle = os.path.join(path(), "test.pickle") test_hmm.write(test_pickle, 'pickle') test_hmm_new = HMM.create(input_format = 'pickle', file = test_pickle) assert test_hmm.hmmer == test_hmm_new.hmmer assert test_hmm.alphabet == test_hmm_new.alphabet if os.path.exists(test_pickle): os.remove(test_pickle)
def test_too_big_hmms(): test_repeat = repeat.Repeat(msa = TEST_RESULT_REPEAT_MSA_LONG) test_hmm = HMM.create(input_format = 'repeat', repeat = test_repeat) test_seq = sequence.Sequence(TEST_SEQUENCE_A) test_optimized_repeat = test_seq.detect([test_hmm]) assert type(test_optimized_repeat) == repeat_list.RepeatList assert len(test_optimized_repeat.repeats) == 0 test_repeat = repeat.Repeat(msa = TEST_RESULT_REPEAT_MSA_SUPER_LONG) test_hmm = HMM.create(input_format = 'repeat', repeat = test_repeat) test_seq = sequence.Sequence(TEST_SEQUENCE_SUPER_LONG_A) test_optimized_repeat = test_seq.detect([test_hmm]) assert type(test_optimized_repeat) == repeat_list.RepeatList assert len(test_optimized_repeat.repeats) == 0
def test_too_big_hmms(): test_repeat = repeat.Repeat(msa=TEST_RESULT_REPEAT_MSA_LONG) test_hmm = HMM.create(input_format='repeat', repeat=test_repeat) test_seq = sequence.Sequence(TEST_SEQUENCE_A) test_optimized_repeat = test_seq.detect([test_hmm]) assert type(test_optimized_repeat) == repeat_list.RepeatList assert len(test_optimized_repeat.repeats) == 0 test_repeat = repeat.Repeat(msa=TEST_RESULT_REPEAT_MSA_SUPER_LONG) test_hmm = HMM.create(input_format='repeat', repeat=test_repeat) test_seq = sequence.Sequence(TEST_SEQUENCE_SUPER_LONG_A) test_optimized_repeat = test_seq.detect([test_hmm]) assert type(test_optimized_repeat) == repeat_list.RepeatList assert len(test_optimized_repeat.repeats) == 0
def test_viterbi(): # {Test_name: [Original_TR_MSA, Sequence, Viterbi_path, Refined_TR_MSA], ... } TEST = { "Single": [["A", "A", "A"], "AAAAAA", ["M1", "M1", "M1", "M1", "M1", "M1"], ["A", "A", "A", "A", "A", "A"]], "Double": [["AA", "AA"], "AAAAAA", ["M1", "M2", "M1", "M2", "M1", "M2"], ["AA", "AA", "AA"]], "Long": [["ADKL", "ADKL"], "GYRADKLADKLADKL", [ "N", "N", "N", "M1", "M2", "M3", "M4", "M1", "M2", "M3", "M4", "M1", "M2", "M3", "M4" ], ["ADKL", "ADKL", "ADKL"]] } for test, p in TEST.items(): test_repeat = repeat.Repeat(msa=p[0]) test_hmm = HMM.create(input_format="repeat", repeat=test_repeat) for iHMM in [test_hmm]: # Detect TRs on self.seq with hmm using the Viterbi algorithm. most_likely_path = iHMM.viterbi(p[1]) assert type(most_likely_path) == list assert most_likely_path == p[2] unaligned_msa = hmm_path_to_non_aligned_tandem_repeat_units( p[1], most_likely_path, iHMM.l_effective) assert unaligned_msa == p[3] aligned_msa = repeat_align.realign_repeat(unaligned_msa) assert aligned_msa == p[3]
def test_create_HMM_from_Repeat(): test_repeat = repeat.Repeat(msa=TEST_REPEAT_MSA_DOUBLE) test_hmm = HMM.create(input_format='repeat', repeat=test_repeat) assert test_hmm.l_effective == 2 assert set(test_hmm.states) == set(TEST_HMM_STATES_DOUBLE) assert test_hmm.p_0 == TEST_HMM_P0_DOUBLE #assert test_hmm.p_t == TEST_HMM_P0_DOUBLE test_repeat = repeat.Repeat(msa=TEST_REPEAT_MSA_SINGLE) test_hmm = HMM.create(input_format='repeat', repeat=test_repeat) assert test_hmm.l_effective == 1 assert test_hmm.states == TEST_HMM_STATES_SINGLE assert test_hmm.p_0 == TEST_HMM_P0_SINGLE
def test_sequence_pickle(): test_seq = sequence.Sequence(TEST_SEQUENCE) test_pickle = os.path.join(path(), "test.pickle") test_seq.write(test_pickle, 'pickle') test_seq_new = sequence.Sequence.create(test_pickle, 'pickle') assert test_seq.seq == test_seq_new.seq test_repeat = repeat.Repeat(msa=TEST_REPEAT_MSA_DOUBLE) test_hmm = HMM.create(input_format='repeat', repeat=test_repeat) test_optimized_repeat = test_seq.detect([test_hmm]) test_seq.set_repeatlist(test_optimized_repeat, TEST_SEQUENCE_TAG) assert type(test_optimized_repeat) == repeat_list.RepeatList assert list(test_seq.d_repeatlist.keys()) == [TEST_SEQUENCE_TAG] assert type( test_seq.d_repeatlist[TEST_SEQUENCE_TAG]) == repeat_list.RepeatList assert test_seq.d_repeatlist[TEST_SEQUENCE_TAG].repeats test_retrieved_repeatlist = test_seq.get_repeatlist(TEST_SEQUENCE_TAG) assert test_retrieved_repeatlist == test_optimized_repeat test_seq.write(test_pickle, 'pickle') test_seq_new = sequence.Sequence.create(test_pickle, 'pickle') assert test_seq.d_repeatlist.keys() == test_seq_new.d_repeatlist.keys() assert test_seq.d_repeatlist[TEST_SEQUENCE_TAG].repeats[ 0].msa == test_seq_new.d_repeatlist[TEST_SEQUENCE_TAG].repeats[0].msa if os.path.exists(test_pickle): os.remove(test_pickle)
def test_detect_repeats_with_repeat(): test_repeat = repeat.Repeat(msa = TEST_REPEAT_MSA_DOUBLE) test_hmm = HMM.create(input_format = 'repeat', repeat = test_repeat) test_seq = sequence.Sequence(TEST_SEQUENCE) test_optimized_repeat = test_seq.detect([test_hmm]) assert type(test_optimized_repeat) == repeat_list.RepeatList assert len(test_optimized_repeat.repeats) == 1 assert test_optimized_repeat.repeats[0].msa == TEST_RESULT_REPEAT_MSA_DOUBLE test_repeat = repeat.Repeat(msa = TEST_REPEAT_MSA_SINGLE) test_hmm = HMM.create(input_format = 'repeat', repeat = test_repeat) test_optimized_repeat = test_seq.detect([test_hmm]) assert type(test_optimized_repeat) == repeat_list.RepeatList assert len(test_optimized_repeat.repeats) == 1 assert test_optimized_repeat.repeats[0].msa == TEST_RESULT_REPEAT_MSA_SINGLE
def test_sequence_pickle(): test_seq = sequence.Sequence(TEST_SEQUENCE) test_pickle = os.path.join(path(), "test.pickle") test_seq.write(test_pickle, 'pickle') test_seq_new = sequence.Sequence.create(test_pickle, 'pickle') assert test_seq.seq == test_seq_new.seq test_repeat = repeat.Repeat(msa = TEST_REPEAT_MSA_DOUBLE) test_hmm = HMM.create(input_format = 'repeat', repeat = test_repeat) test_optimized_repeat = test_seq.detect([test_hmm]) test_seq.set_repeatlist(test_optimized_repeat, TEST_SEQUENCE_TAG) assert type(test_optimized_repeat) == repeat_list.RepeatList assert list(test_seq.d_repeatlist.keys()) == [TEST_SEQUENCE_TAG] assert type(test_seq.d_repeatlist[TEST_SEQUENCE_TAG]) == repeat_list.RepeatList assert test_seq.d_repeatlist[TEST_SEQUENCE_TAG].repeats test_retrieved_repeatlist = test_seq.get_repeatlist(TEST_SEQUENCE_TAG) assert test_retrieved_repeatlist == test_optimized_repeat test_seq.write(test_pickle, 'pickle') test_seq_new = sequence.Sequence.create(test_pickle, 'pickle') assert test_seq.d_repeatlist.keys() == test_seq_new.d_repeatlist.keys() assert test_seq.d_repeatlist[TEST_SEQUENCE_TAG].repeats[0].msa == test_seq_new.d_repeatlist[TEST_SEQUENCE_TAG].repeats[0].msa if os.path.exists(test_pickle): os.remove(test_pickle)
def test_single_hmm_without_id_read(path): test_dict_list = list(HMM.read(os.path.join(path, TEST_FILE_WITHOUT_ID))) assert len(test_dict_list) == 1 test_dict = test_dict_list[0] compare_carcinustatin(test_dict) assert test_dict[ID_KEY_NAME] is None
def test_create_HMM_from_Repeat(): test_repeat = repeat.Repeat(msa = TEST_REPEAT_MSA_DOUBLE) test_hmm = HMM.create(input_format = 'repeat', repeat = test_repeat) assert test_hmm.l_effective == 2 assert set(test_hmm.states) == set(TEST_HMM_STATES_DOUBLE) assert test_hmm.p_0 == TEST_HMM_P0_DOUBLE #assert test_hmm.p_t == TEST_HMM_P0_DOUBLE test_repeat = repeat.Repeat(msa = TEST_REPEAT_MSA_SINGLE) test_hmm = HMM.create(input_format = 'repeat', repeat = test_repeat) assert test_hmm.l_effective == 1 assert test_hmm.states == TEST_HMM_STATES_SINGLE assert test_hmm.p_0 == TEST_HMM_P0_SINGLE
def test_detect_repeats_with_repeat(): test_repeat = repeat.Repeat(msa=TEST_REPEAT_MSA_DOUBLE) test_hmm = HMM.create(input_format='repeat', repeat=test_repeat) test_seq = sequence.Sequence(TEST_SEQUENCE) test_optimized_repeat = test_seq.detect([test_hmm]) assert type(test_optimized_repeat) == repeat_list.RepeatList assert len(test_optimized_repeat.repeats) == 1 assert test_optimized_repeat.repeats[ 0].msa == TEST_RESULT_REPEAT_MSA_DOUBLE test_repeat = repeat.Repeat(msa=TEST_REPEAT_MSA_SINGLE) test_hmm = HMM.create(input_format='repeat', repeat=test_repeat) test_optimized_repeat = test_seq.detect([test_hmm]) assert type(test_optimized_repeat) == repeat_list.RepeatList assert len(test_optimized_repeat.repeats) == 1 assert test_optimized_repeat.repeats[ 0].msa == TEST_RESULT_REPEAT_MSA_SINGLE
def test_init_with_repeat(): """ This test needs to be fixed. """ assert 0, "Test not fixed" my_TR = Repeat(begin = 0, msa = ['A-G', 'ACG', 'ACG'], sequence_type = 'DNA') my_hmm = HMM.create(repeat=my_TR) from . import sequence my_sequence = sequence.Sequence() my_viterbi = Viterbi(my_hmm, my_sequence.sequence) v = my_viterbi.viterbi() "".join(v)
def test_init_with_repeat(): """ This test needs to be fixed. """ assert 0, "Test not fixed" my_TR = Repeat(begin=0, msa=['A-G', 'ACG', 'ACG'], sequence_type='DNA') my_hmm = HMM.create(repeat=my_TR) from . import sequence my_sequence = sequence.Sequence() my_viterbi = Viterbi(my_hmm, my_sequence.sequence) v = my_viterbi.viterbi() "".join(v)
def test_viterbi(): # {Test_name: [Original_TR_MSA, Sequence, Viterbi_path, Refined_TR_MSA], ... } TEST = {"Single": [["A","A","A"], "AAAAAA", ["M1","M1","M1","M1","M1","M1"], ["A","A","A","A","A","A"]], "Double": [["AA","AA"], "AAAAAA", ["M1","M2","M1","M2","M1","M2"], ["AA","AA","AA"]], "Long": [["ADKL","ADKL"], "GYRADKLADKLADKL", ["N","N","N","M1","M2","M3","M4","M1","M2","M3","M4","M1","M2","M3","M4"], ["ADKL","ADKL","ADKL"]] } for test, p in TEST.items(): test_repeat = repeat.Repeat(msa = p[0]) test_hmm = HMM.create(input_format = "repeat", repeat = test_repeat) for iHMM in [test_hmm]: # Detect TRs on self.seq with hmm using the Viterbi algorithm. most_likely_path = iHMM.viterbi(p[1]) assert type(most_likely_path) == list assert most_likely_path == p[2] unaligned_msa = hmm_path_to_non_aligned_tandem_repeat_units(p[1], most_likely_path, iHMM.l_effective) assert unaligned_msa == p[3] aligned_msa = repeat_align.realign_repeat(unaligned_msa) assert aligned_msa == p[3]
def test_detect_repeats_with_hmm(): test_hmm = HMM.create(input_format='hmmer', file=os.path.join(path(), TEST_FILE_WITH_ID)) test_seq = sequence.Sequence(TEST_SEQUENCE) test_optimized_repeat = test_seq.detect([test_hmm])
def test_single_hmm_no_id_with_query_read(path): test_dict_list = list(HMM.read(os.path.join(path, TEST_FILE_WITHOUT_ID), id=CARCINUSTATIN_ID)) assert len(test_dict_list) == 0
def test_detect_repeats_with_hmm(): test_hmm = HMM.create(input_format = 'hmmer', file = os.path.join(path(), TEST_FILE_WITH_ID)) test_seq = sequence.Sequence(TEST_SEQUENCE) test_optimized_repeat = test_seq.detect([test_hmm])
def test_single_hmm_with_wrong_query_read(path): test_dict_list = list(HMM.read(os.path.join(path, TEST_FILE_WITH_ID), id=WRONG_CARCINUSTATIN_ID)) assert len(test_dict_list) == 0
def test_single_hmm_with_wrong_query_read(path): test_dict_list = list( HMM.read(os.path.join(path, TEST_FILE_WITH_ID), id=WRONG_CARCINUSTATIN_ID)) assert len(test_dict_list) == 0
def test_single_hmm_no_id_with_query_read(path): test_dict_list = list( HMM.read(os.path.join(path, TEST_FILE_WITHOUT_ID), id=CARCINUSTATIN_ID)) assert len(test_dict_list) == 0