def test_too_big_hmms(): test_repeat = repeat.Repeat(msa=TEST_RESULT_REPEAT_MSA_LONG) test_hmm = HMM.create(format='repeat', repeat=test_repeat) test_seq = sequence.Sequence(TEST_SEQUENCE_A) test_optimized_repeat = test_seq.detect([test_hmm]) assert type(test_optimized_repeat) == repeat_list.Repeat_list assert len(test_optimized_repeat.repeats) == 0 test_repeat = repeat.Repeat(msa=TEST_RESULT_REPEAT_MSA_SUPER_LONG) test_hmm = HMM.create(format='repeat', repeat=test_repeat) test_seq = sequence.Sequence(TEST_SEQUENCE_SUPER_LONG_A) test_optimized_repeat = test_seq.detect([test_hmm]) assert type(test_optimized_repeat) == repeat_list.Repeat_list assert len(test_optimized_repeat.repeats) == 0
def test_sequence_pickle(): test_seq = sequence.Sequence(TEST_SEQUENCE) test_pickle = os.path.join(path(), "test.pickle") test_seq.write(test_pickle, 'pickle') test_seq_new = sequence.Sequence.create(test_pickle, 'pickle') assert test_seq.seq == test_seq_new.seq test_repeat = repeat.Repeat(msa=TEST_REPEAT_MSA_DOUBLE) test_hmm = HMM.create(format='repeat', repeat=test_repeat) test_optimized_repeat = test_seq.detect([test_hmm]) test_seq.set_repeat_list(test_optimized_repeat, TEST_SEQUENCE_TAG) assert type(test_optimized_repeat) == repeat_list.Repeat_list assert list(test_seq.dRepeat_list.keys()) == [TEST_SEQUENCE_TAG] assert type( test_seq.dRepeat_list[TEST_SEQUENCE_TAG]) == repeat_list.Repeat_list assert test_seq.dRepeat_list[TEST_SEQUENCE_TAG].repeats test_seq.write(test_pickle, 'pickle') test_seq_new = sequence.Sequence.create(test_pickle, 'pickle') assert test_seq.dRepeat_list.keys() == test_seq_new.dRepeat_list.keys() assert test_seq.dRepeat_list[TEST_SEQUENCE_TAG].repeats[ 0].msa == test_seq_new.dRepeat_list[TEST_SEQUENCE_TAG].repeats[0].msa if os.path.exists(test_pickle): os.remove(test_pickle)
def test_detect_repeats_denovo(): test_parameters = {"detection": {"lFinders": ["TRUST"]}} test_seq = sequence.Sequence(TEST_SEQUENCE_Q9BRR0) test_optimized_repeat = test_seq.detect(denovo=True, **test_parameters) assert type(test_optimized_repeat) == repeat_list.Repeat_list assert len(test_optimized_repeat.repeats) == 3
def test_detect_XSTREAM(): test_seq = sequence.Sequence(TEST_SEQUENCE_Q9BRR0) predicted_repeats = repeat_detection_run.run_TRD(seq_records=[test_seq], lFinders=["XSTREAM" ])[0]['XSTREAM'] assert len(predicted_repeats) == 1 assert predicted_repeats[0].msa == [ 'ECGKSFAQS-SGLSK-HRRIHTGEKPYECE', 'ECGKAFIGS-SALVI-HQRVHTGEKPYECE', 'ECGKAFSHS-SDL-IKHQRTHTGEKPYECD', 'DCGKTFSQSCSLLEH-H-RIHTGEKPY' ]
def test_serialize_repeat_list_tsv(): test_repeats = [repeat.Repeat(msa=i) for i in TEST_REPEATS] test_seq = sequence.Sequence(TEST_SEQUENCE) for i in test_repeats: test_seq.repeat_in_sequence(i) test_repeat_list = rl.Repeat_list(repeats=test_repeats) tsv = rl_io.serialize_repeat_list_tsv(test_repeat_list) assert type(tsv) == str
def test_serialize_repeat_list_tsv(): test_repeats = [repeat.Repeat(msa=i) for i in TEST_REPEATS[:2]] test_seq = sequence.Sequence(TEST_SEQUENCE) for i in test_repeats: test_seq.repeat_in_sequence(i) test_repeat_list = rl.Repeat_list(repeats=test_repeats) tsv = test_repeat_list.write("tsv", str=True) assert type(tsv) == str
def test_detect_HHrepID(): test_seq = sequence.Sequence(TEST_SEQUENCE_Q9BRR0) predicted_repeats = repeat_detection_run.run_TRD(seq_records=[test_seq], lFinders=["HHrepID" ])[0]['HHrepID'] assert len(predicted_repeats) == 1 assert predicted_repeats[0].msa == [ '------------------IPTCAEAGEQ----', 'EGRLQRKQKNATGGRRHICHECGKSFAQ----', 'SSGLSKHRRIHTGEKPYECEECGKAFIG----', 'SSALVIHQRVHTGEKPYECEECGKAFSH----', 'SSDLIKHQRTHTGEKPYECDDCGKTFSQ----', 'SCSLLEHHRIHTGEKPYQCSMCGKAFRR----', 'SSHLLRHQRIHTGDKNVQEPEQGEAWKSRMES', '------QLENVETPMSYKCNECERSFTQ----', 'NTGLIEHQKIHTGEKPYQCNACGKGFTR----', 'ISYLVQHQRSHVG-------------------' ]
def test_detect_TRUST(): # Warning: TRUST finds these results ONLY with the BLOSUM50 substitution matrix. test_seq = sequence.Sequence(TEST_SEQUENCE_Q9BRR0) predicted_repeats = repeat_detection_run.run_TRD(seq_records=[test_seq], lFinders=["TRUST" ])[0]['TRUST'] assert len(predicted_repeats) == 3 assert predicted_repeats[0].msa == [ 'HLREDIAQIP---TCAEAGE---QEGRLQR', 'KQKNATGGRR--HICHECGKSFAQSSGLSK', 'HRRIHTGEKP--YECEECGKAFIGSSALVI', 'HQRVHTGEKP--YECEECGKAFSHSSDLIK', 'HQRTHTGEKP--YECDDCGKTFSQSCSLLE', 'HHRIHTGEKP--YQCSMCGKAFRRSSHLLR', 'HQRIHTGDKN--VQEPEQGEAW--KSRM--', 'ESQLENVETPmsYKCNECERSFTQNTGLIE', 'HQKIHTGEKP--YQCNACGKGFTRISYLVQ' ]
def test_detect_TREKS(): test_seq = sequence.Sequence(TEST_SEQUENCE_Q9BRR0) predicted_repeats = repeat_detection_run.run_TRD(seq_records=[test_seq], lFinders=["T-REKS" ])[0]['T-REKS'] assert len(predicted_repeats) == 1 assert predicted_repeats[0].msa == [ 'C---G---KSFAQSSGLSKHRRIHTGEKPYECE-E', 'C---G---KAFIGSSALVIHQRVHTGEKPYECE-E', 'C---G---KAFSHSSDLIKHQRTHTGEKPYECD-D', 'C---G---KTFSQSCSLLEHHRIHTGEKPYQCS-M', 'C---G---KAFRRSSHLLRHQRIHTGDKNVQ-EPE', 'Q---G---EAW--KSRME-SQ-LENVETPMSYK--', 'C---NECERSFTQNTGLIEHQKIHTGEKPYQ----', 'CNACG---KGFTRISYLVQHQRSHVG-KNI-LS--' ]
def test_detect_repeats_with_repeat(): test_repeat = repeat.Repeat(msa=TEST_REPEAT_MSA_DOUBLE) test_hmm = HMM.create(format='repeat', repeat=test_repeat) test_seq = sequence.Sequence(TEST_SEQUENCE) test_optimized_repeat = test_seq.detect([test_hmm]) assert type(test_optimized_repeat) == repeat_list.Repeat_list assert len(test_optimized_repeat.repeats) == 1 assert test_optimized_repeat.repeats[ 0].msa == TEST_RESULT_REPEAT_MSA_DOUBLE test_repeat = repeat.Repeat(msa=TEST_REPEAT_MSA_SINGLE) test_hmm = HMM.create(format='repeat', repeat=test_repeat) test_optimized_repeat = test_seq.detect([test_hmm]) assert type(test_optimized_repeat) == repeat_list.Repeat_list assert len(test_optimized_repeat.repeats) == 1 assert test_optimized_repeat.repeats[ 0].msa == TEST_RESULT_REPEAT_MSA_SINGLE
def test_detect_repeats_with_hmm(): test_hmm = HMM.create(format='hmmer', file=os.path.join(path(), TEST_FILE_WITH_ID)) test_seq = sequence.Sequence(TEST_SEQUENCE) test_optimized_repeat = test_seq.detect([test_hmm])
def test_initialise_sequence(): test_seq = sequence.Sequence(TEST_SEQUENCE) assert test_seq.seq == TEST_SEQUENCE