示例#1
0
def test_too_big_hmms():

    test_repeat = repeat.Repeat(msa=TEST_RESULT_REPEAT_MSA_LONG)
    test_hmm = HMM.create(format='repeat', repeat=test_repeat)
    test_seq = sequence.Sequence(TEST_SEQUENCE_A)
    test_optimized_repeat = test_seq.detect([test_hmm])
    assert type(test_optimized_repeat) == repeat_list.Repeat_list
    assert len(test_optimized_repeat.repeats) == 0

    test_repeat = repeat.Repeat(msa=TEST_RESULT_REPEAT_MSA_SUPER_LONG)
    test_hmm = HMM.create(format='repeat', repeat=test_repeat)
    test_seq = sequence.Sequence(TEST_SEQUENCE_SUPER_LONG_A)
    test_optimized_repeat = test_seq.detect([test_hmm])
    assert type(test_optimized_repeat) == repeat_list.Repeat_list
    assert len(test_optimized_repeat.repeats) == 0
示例#2
0
def test_sequence_pickle():

    test_seq = sequence.Sequence(TEST_SEQUENCE)

    test_pickle = os.path.join(path(), "test.pickle")
    test_seq.write(test_pickle, 'pickle')
    test_seq_new = sequence.Sequence.create(test_pickle, 'pickle')

    assert test_seq.seq == test_seq_new.seq

    test_repeat = repeat.Repeat(msa=TEST_REPEAT_MSA_DOUBLE)
    test_hmm = HMM.create(format='repeat', repeat=test_repeat)
    test_optimized_repeat = test_seq.detect([test_hmm])
    test_seq.set_repeat_list(test_optimized_repeat, TEST_SEQUENCE_TAG)

    assert type(test_optimized_repeat) == repeat_list.Repeat_list
    assert list(test_seq.dRepeat_list.keys()) == [TEST_SEQUENCE_TAG]
    assert type(
        test_seq.dRepeat_list[TEST_SEQUENCE_TAG]) == repeat_list.Repeat_list
    assert test_seq.dRepeat_list[TEST_SEQUENCE_TAG].repeats

    test_seq.write(test_pickle, 'pickle')
    test_seq_new = sequence.Sequence.create(test_pickle, 'pickle')

    assert test_seq.dRepeat_list.keys() == test_seq_new.dRepeat_list.keys()
    assert test_seq.dRepeat_list[TEST_SEQUENCE_TAG].repeats[
        0].msa == test_seq_new.dRepeat_list[TEST_SEQUENCE_TAG].repeats[0].msa

    if os.path.exists(test_pickle):
        os.remove(test_pickle)
示例#3
0
def test_detect_repeats_denovo():

    test_parameters = {"detection": {"lFinders": ["TRUST"]}}

    test_seq = sequence.Sequence(TEST_SEQUENCE_Q9BRR0)
    test_optimized_repeat = test_seq.detect(denovo=True, **test_parameters)

    assert type(test_optimized_repeat) == repeat_list.Repeat_list
    assert len(test_optimized_repeat.repeats) == 3
示例#4
0
def test_detect_XSTREAM():
    test_seq = sequence.Sequence(TEST_SEQUENCE_Q9BRR0)
    predicted_repeats = repeat_detection_run.run_TRD(seq_records=[test_seq],
                                                     lFinders=["XSTREAM"
                                                               ])[0]['XSTREAM']
    assert len(predicted_repeats) == 1
    assert predicted_repeats[0].msa == [
        'ECGKSFAQS-SGLSK-HRRIHTGEKPYECE', 'ECGKAFIGS-SALVI-HQRVHTGEKPYECE',
        'ECGKAFSHS-SDL-IKHQRTHTGEKPYECD', 'DCGKTFSQSCSLLEH-H-RIHTGEKPY'
    ]
示例#5
0
def test_serialize_repeat_list_tsv():

    test_repeats = [repeat.Repeat(msa=i) for i in TEST_REPEATS]
    test_seq = sequence.Sequence(TEST_SEQUENCE)
    for i in test_repeats:
        test_seq.repeat_in_sequence(i)
    test_repeat_list = rl.Repeat_list(repeats=test_repeats)

    tsv = rl_io.serialize_repeat_list_tsv(test_repeat_list)

    assert type(tsv) == str
示例#6
0
def test_serialize_repeat_list_tsv():

    test_repeats = [repeat.Repeat(msa=i) for i in TEST_REPEATS[:2]]
    test_seq = sequence.Sequence(TEST_SEQUENCE)
    for i in test_repeats:
        test_seq.repeat_in_sequence(i)
    test_repeat_list = rl.Repeat_list(repeats=test_repeats)

    tsv = test_repeat_list.write("tsv", str=True)

    assert type(tsv) == str
示例#7
0
def test_detect_HHrepID():

    test_seq = sequence.Sequence(TEST_SEQUENCE_Q9BRR0)
    predicted_repeats = repeat_detection_run.run_TRD(seq_records=[test_seq],
                                                     lFinders=["HHrepID"
                                                               ])[0]['HHrepID']
    assert len(predicted_repeats) == 1
    assert predicted_repeats[0].msa == [
        '------------------IPTCAEAGEQ----', 'EGRLQRKQKNATGGRRHICHECGKSFAQ----',
        'SSGLSKHRRIHTGEKPYECEECGKAFIG----', 'SSALVIHQRVHTGEKPYECEECGKAFSH----',
        'SSDLIKHQRTHTGEKPYECDDCGKTFSQ----', 'SCSLLEHHRIHTGEKPYQCSMCGKAFRR----',
        'SSHLLRHQRIHTGDKNVQEPEQGEAWKSRMES', '------QLENVETPMSYKCNECERSFTQ----',
        'NTGLIEHQKIHTGEKPYQCNACGKGFTR----', 'ISYLVQHQRSHVG-------------------'
    ]
示例#8
0
def test_detect_TRUST():

    # Warning: TRUST finds these results ONLY with the BLOSUM50 substitution matrix.
    test_seq = sequence.Sequence(TEST_SEQUENCE_Q9BRR0)
    predicted_repeats = repeat_detection_run.run_TRD(seq_records=[test_seq],
                                                     lFinders=["TRUST"
                                                               ])[0]['TRUST']
    assert len(predicted_repeats) == 3
    assert predicted_repeats[0].msa == [
        'HLREDIAQIP---TCAEAGE---QEGRLQR', 'KQKNATGGRR--HICHECGKSFAQSSGLSK',
        'HRRIHTGEKP--YECEECGKAFIGSSALVI', 'HQRVHTGEKP--YECEECGKAFSHSSDLIK',
        'HQRTHTGEKP--YECDDCGKTFSQSCSLLE', 'HHRIHTGEKP--YQCSMCGKAFRRSSHLLR',
        'HQRIHTGDKN--VQEPEQGEAW--KSRM--', 'ESQLENVETPmsYKCNECERSFTQNTGLIE',
        'HQKIHTGEKP--YQCNACGKGFTRISYLVQ'
    ]
示例#9
0
def test_detect_TREKS():

    test_seq = sequence.Sequence(TEST_SEQUENCE_Q9BRR0)
    predicted_repeats = repeat_detection_run.run_TRD(seq_records=[test_seq],
                                                     lFinders=["T-REKS"
                                                               ])[0]['T-REKS']
    assert len(predicted_repeats) == 1
    assert predicted_repeats[0].msa == [
        'C---G---KSFAQSSGLSKHRRIHTGEKPYECE-E',
        'C---G---KAFIGSSALVIHQRVHTGEKPYECE-E',
        'C---G---KAFSHSSDLIKHQRTHTGEKPYECD-D',
        'C---G---KTFSQSCSLLEHHRIHTGEKPYQCS-M',
        'C---G---KAFRRSSHLLRHQRIHTGDKNVQ-EPE',
        'Q---G---EAW--KSRME-SQ-LENVETPMSYK--',
        'C---NECERSFTQNTGLIEHQKIHTGEKPYQ----',
        'CNACG---KGFTRISYLVQHQRSHVG-KNI-LS--'
    ]
示例#10
0
def test_detect_repeats_with_repeat():

    test_repeat = repeat.Repeat(msa=TEST_REPEAT_MSA_DOUBLE)
    test_hmm = HMM.create(format='repeat', repeat=test_repeat)
    test_seq = sequence.Sequence(TEST_SEQUENCE)
    test_optimized_repeat = test_seq.detect([test_hmm])
    assert type(test_optimized_repeat) == repeat_list.Repeat_list
    assert len(test_optimized_repeat.repeats) == 1
    assert test_optimized_repeat.repeats[
        0].msa == TEST_RESULT_REPEAT_MSA_DOUBLE

    test_repeat = repeat.Repeat(msa=TEST_REPEAT_MSA_SINGLE)
    test_hmm = HMM.create(format='repeat', repeat=test_repeat)
    test_optimized_repeat = test_seq.detect([test_hmm])
    assert type(test_optimized_repeat) == repeat_list.Repeat_list
    assert len(test_optimized_repeat.repeats) == 1
    assert test_optimized_repeat.repeats[
        0].msa == TEST_RESULT_REPEAT_MSA_SINGLE
示例#11
0
def test_detect_repeats_with_hmm():
    test_hmm = HMM.create(format='hmmer',
                          file=os.path.join(path(), TEST_FILE_WITH_ID))
    test_seq = sequence.Sequence(TEST_SEQUENCE)
    test_optimized_repeat = test_seq.detect([test_hmm])
示例#12
0
def test_initialise_sequence():
    test_seq = sequence.Sequence(TEST_SEQUENCE)
    assert test_seq.seq == TEST_SEQUENCE