示例#1
0
def test_repeat_ambiguous():

    myTR_O = repeat.Repeat(msa=TEST_MSA_O)
    myTR_K = repeat.Repeat(msa=TEST_MSA_K)

    assert myTR_O.msaTD_standard_aa == myTR_K.msaTD
    assert myTR_O.msaTD_standard_aa == myTR_K.msaTD_standard_aa

    assert myTR_O.score(TEST_SCORE) == myTR_K.score(TEST_SCORE)
    assert myTR_O.divergence(TEST_SCORE) == myTR_K.divergence(TEST_SCORE)
    assert myTR_O.pvalue(TEST_SCORE) == myTR_K.pvalue(TEST_SCORE)

    assert myTR_O.divergence(TEST_SCORE) == 2.095947265625
    assert myTR_K.pvalue(TEST_SCORE) == 0.3507
示例#2
0
def test_too_big_hmms():

    test_repeat = repeat.Repeat(msa=TEST_RESULT_REPEAT_MSA_LONG)
    test_hmm = HMM.create(input_format='repeat', repeat=test_repeat)
    test_seq = sequence.Sequence(TEST_SEQUENCE_A)
    test_optimized_repeat = test_seq.detect([test_hmm])
    assert type(test_optimized_repeat) == repeat_list.RepeatList
    assert len(test_optimized_repeat.repeats) == 0

    test_repeat = repeat.Repeat(msa=TEST_RESULT_REPEAT_MSA_SUPER_LONG)
    test_hmm = HMM.create(input_format='repeat', repeat=test_repeat)
    test_seq = sequence.Sequence(TEST_SEQUENCE_SUPER_LONG_A)
    test_optimized_repeat = test_seq.detect([test_hmm])
    assert type(test_optimized_repeat) == repeat_list.RepeatList
    assert len(test_optimized_repeat.repeats) == 0
示例#3
0
文件: hmm_test.py 项目: sbliven/tral
def test_create_HMM_from_Repeat():

    test_repeat = repeat.Repeat(msa=TEST_REPEAT_MSA_DOUBLE)
    test_hmm = HMM.create(input_format='repeat', repeat=test_repeat)

    assert test_hmm.l_effective == 2
    assert set(test_hmm.states) == set(TEST_HMM_STATES_DOUBLE)
    assert test_hmm.p_0 == TEST_HMM_P0_DOUBLE
    #assert test_hmm.p_t == TEST_HMM_P0_DOUBLE

    test_repeat = repeat.Repeat(msa=TEST_REPEAT_MSA_SINGLE)
    test_hmm = HMM.create(input_format='repeat', repeat=test_repeat)

    assert test_hmm.l_effective == 1
    assert test_hmm.states == TEST_HMM_STATES_SINGLE
    assert test_hmm.p_0 == TEST_HMM_P0_SINGLE
示例#4
0
def test_sequence_pickle():

    test_seq = sequence.Sequence(TEST_SEQUENCE)

    test_pickle = os.path.join(path(), "test.pickle")
    test_seq.write(test_pickle, 'pickle')
    test_seq_new = sequence.Sequence.create(test_pickle, 'pickle')

    assert test_seq.seq == test_seq_new.seq

    test_repeat = repeat.Repeat(msa=TEST_REPEAT_MSA_DOUBLE)
    test_hmm = HMM.create(input_format='repeat', repeat=test_repeat)
    test_optimized_repeat = test_seq.detect([test_hmm])
    test_seq.set_repeatlist(test_optimized_repeat, TEST_SEQUENCE_TAG)

    assert type(test_optimized_repeat) == repeat_list.RepeatList
    assert list(test_seq.d_repeatlist.keys()) == [TEST_SEQUENCE_TAG]
    assert type(
        test_seq.d_repeatlist[TEST_SEQUENCE_TAG]) == repeat_list.RepeatList
    assert test_seq.d_repeatlist[TEST_SEQUENCE_TAG].repeats

    test_retrieved_repeatlist = test_seq.get_repeatlist(TEST_SEQUENCE_TAG)
    assert test_retrieved_repeatlist == test_optimized_repeat

    test_seq.write(test_pickle, 'pickle')
    test_seq_new = sequence.Sequence.create(test_pickle, 'pickle')

    assert test_seq.d_repeatlist.keys() == test_seq_new.d_repeatlist.keys()
    assert test_seq.d_repeatlist[TEST_SEQUENCE_TAG].repeats[
        0].msa == test_seq_new.d_repeatlist[TEST_SEQUENCE_TAG].repeats[0].msa

    if os.path.exists(test_pickle):
        os.remove(test_pickle)
示例#5
0
def test_viterbi():

    # {Test_name: [Original_TR_MSA, Sequence, Viterbi_path, Refined_TR_MSA], ... }
    TEST = {
        "Single": [["A", "A", "A"], "AAAAAA",
                   ["M1", "M1", "M1", "M1", "M1", "M1"],
                   ["A", "A", "A", "A", "A", "A"]],
        "Double": [["AA", "AA"], "AAAAAA",
                   ["M1", "M2", "M1", "M2", "M1", "M2"], ["AA", "AA", "AA"]],
        "Long": [["ADKL", "ADKL"], "GYRADKLADKLADKL",
                 [
                     "N", "N", "N", "M1", "M2", "M3", "M4", "M1", "M2", "M3",
                     "M4", "M1", "M2", "M3", "M4"
                 ], ["ADKL", "ADKL", "ADKL"]]
    }

    for test, p in TEST.items():
        test_repeat = repeat.Repeat(msa=p[0])
        test_hmm = HMM.create(input_format="repeat", repeat=test_repeat)

        for iHMM in [test_hmm]:
            # Detect TRs on self.seq with hmm using the Viterbi algorithm.
            most_likely_path = iHMM.viterbi(p[1])
            assert type(most_likely_path) == list
            assert most_likely_path == p[2]

            unaligned_msa = hmm_path_to_non_aligned_tandem_repeat_units(
                p[1], most_likely_path, iHMM.l_effective)
            assert unaligned_msa == p[3]

            aligned_msa = repeat_align.realign_repeat(unaligned_msa)
            assert aligned_msa == p[3]
示例#6
0
def test_create_repeat_list_from_repeats():

    test_repeats = [repeat.Repeat(msa=i) for i in TEST_REPEATS]
    test_repeat_list = rl.RepeatList(repeats=test_repeats)

    assert len(test_repeat_list.repeats) == 4
    for i, j in zip(TEST_REPEATS, test_repeat_list.repeats):
        assert i == j.msa
示例#7
0
def test_detect_repeats_with_repeat():

    test_repeat = repeat.Repeat(msa=TEST_REPEAT_MSA_DOUBLE)
    test_hmm = HMM.create(input_format='repeat', repeat=test_repeat)
    test_seq = sequence.Sequence(TEST_SEQUENCE)
    test_optimized_repeat = test_seq.detect([test_hmm])
    assert type(test_optimized_repeat) == repeat_list.RepeatList
    assert len(test_optimized_repeat.repeats) == 1
    assert test_optimized_repeat.repeats[
        0].msa == TEST_RESULT_REPEAT_MSA_DOUBLE

    test_repeat = repeat.Repeat(msa=TEST_REPEAT_MSA_SINGLE)
    test_hmm = HMM.create(input_format='repeat', repeat=test_repeat)
    test_optimized_repeat = test_seq.detect([test_hmm])
    assert type(test_optimized_repeat) == repeat_list.RepeatList
    assert len(test_optimized_repeat.repeats) == 1
    assert test_optimized_repeat.repeats[
        0].msa == TEST_RESULT_REPEAT_MSA_SINGLE
示例#8
0
def test_serialize_repeat_list_tsv():

    test_repeats = [repeat.Repeat(msa = i) for i in TEST_REPEATS]
    test_seq = sequence.Sequence(TEST_SEQUENCE)
    for i in test_repeats:
        test_seq.repeat_in_sequence(i)
    test_repeat_list = rl.RepeatList(repeats = test_repeats)

    tsv = rl_io.serialize_repeat_list_tsv(test_repeat_list)

    assert type(tsv) == str
示例#9
0
def test_serialize_repeat_list_tsv():

    test_repeats = [repeat.Repeat(msa=i) for i in TEST_REPEATS[:2]]
    test_seq = sequence.Sequence(TEST_SEQUENCE)
    for i in test_repeats:
        test_seq.repeat_in_sequence(i)
    test_repeat_list = rl.RepeatList(repeats=test_repeats)

    tsv = test_repeat_list.write("tsv", return_string=True)

    assert type(tsv) == str
示例#10
0
def test_filter_pvalue():

    #test_repeats = [repeat.Repeat(msa = i, scoreslist = ["phylo_gap01"], calc_score = True, calc_pvalue = True) for i in TEST_REPEATS]
    test_repeats = [repeat.Repeat(msa=i) for i in TEST_REPEATS]
    for i, j in zip(test_repeats, TEST_SCORE_VALUE_LIST):
        i.d_pvalue = {}
        i.d_pvalue[TEST_SCORE] = j

    test_repeat_list = rl.RepeatList(repeats=test_repeats)

    test_repeat_list_filtered = test_repeat_list.filter(
        "pvalue", TEST_SCORE, 0.1)
    assert len(test_repeat_list_filtered.repeats) == 1
示例#11
0
def test_repeat_pickle():

    myTR_O = repeat.Repeat(msa=TEST_MSA_O)

    test_pickle = os.path.join(path(), "test.pickle")
    myTR_O.write(test_pickle, 'pickle')
    myTR_O_new = repeat.Repeat.create(test_pickle, 'pickle')

    assert myTR_O.msa == myTR_O_new.msa
    assert myTR_O.sequence_type == myTR_O_new.sequence_type
    assert myTR_O.text == myTR_O_new.text

    if os.path.exists(test_pickle):
        os.remove(test_pickle)
示例#12
0
文件: hmm_test.py 项目: sbliven/tral
def test_hmm_pickle():

    test_repeat = repeat.Repeat(msa=TEST_REPEAT_MSA_DOUBLE)
    test_hmm = HMM.create(input_format='repeat', repeat=test_repeat)

    test_pickle = os.path.join(path(), "test.pickle")
    test_hmm.write(test_pickle, 'pickle')
    test_hmm_new = HMM.create(input_format='pickle', file=test_pickle)

    assert test_hmm.hmmer == test_hmm_new.hmmer
    assert test_hmm.alphabet == test_hmm_new.alphabet

    if os.path.exists(test_pickle):
        os.remove(test_pickle)
示例#13
0
def test_repeat_score():

    ### MAKE THIS TEST A TEST!
    Q,eqFreq,alphabet = repeat_io.loadModel()
    indelRatePerSite = 0.001

    myTR = repeat.Repeat(msa = TEST_MSA, begin = TEST_BEGIN)
    myTR.deleteInsertionColumns()

    print(loglikelihood_gaps_starphylogeny_zipfian(t = 1, tandem_repeat = myTR))
    print(optimisation(function = loglikelihood_substitution, args = [Q,eqFreq,alphabet,myTR]))
    print(optimisation(function = loglikelihood_substitutions_gaps, args = [[Q,eqFreq,alphabet,myTR],[myTR,indelRatePerSite]]))

    assert 1 == 2
示例#14
0
def test_pairwise_overlap():

    test_repeats = [repeat.Repeat(msa=i) for i in TEST_REPEATS]
    for i, j in zip(test_repeats, TEST_BEGIN_LIST):
        i.begin = j

    assert rl.two_repeats_overlap("common_ancestry",
                                  *test_repeats[:2]) == False
    assert rl.two_repeats_overlap("common_ancestry",
                                  *test_repeats[1:3]) == False
    assert rl.two_repeats_overlap("common_ancestry",
                                  *test_repeats[2:]) == False
    assert rl.two_repeats_overlap("shared_char", *test_repeats[:2]) == False
    assert rl.two_repeats_overlap("shared_char", *test_repeats[1:3]) == True
示例#15
0
def test_repeat_list_pickle():

    test_repeats = [repeat.Repeat(msa=i) for i in TEST_REPEATS]
    test_repeat_list = rl.RepeatList(repeats=test_repeats)

    test_pickle = os.path.join(path(), "test.pickle")
    test_repeat_list.write('pickle', test_pickle)
    test_repeat_list_new = repeat.Repeat.create(test_pickle, 'pickle')

    assert len(test_repeat_list.repeats) == len(test_repeat_list_new.repeats)
    assert test_repeat_list.repeats[0].msa == test_repeat_list_new.repeats[
        0].msa

    if os.path.exists(test_pickle):
        os.remove(test_pickle)
示例#16
0
def test_filter_cluster_based():

    test_repeats = [repeat.Repeat(msa=i) for i in TEST_REPEATS]
    for i, j in zip(test_repeats, TEST_SCORE_VALUE_LIST):
        i.d_pvalue = {}
        i.d_pvalue[TEST_SCORE] = j
    for i, j in zip(test_repeats, TEST_BEGIN_LIST):
        i.begin = j

    test_repeat_list = rl.RepeatList(repeats=test_repeats)
    test_repeat_list.filter("pvalue", TEST_SCORE, 0.1)
    test_repeat_list_filtered = test_repeat_list.filter(
        "none_overlapping", ("common_ancestry", None),
        [("pvalue", TEST_SCORE), ("divergence", TEST_SCORE)])
    assert len(test_repeat_list_filtered.repeats) == 3
    for i in test_repeats[:3]:
        assert i in test_repeat_list_filtered.repeats
示例#17
0
def test_cluster():

    test_repeats = [repeat.Repeat(msa=i) for i in TEST_REPEATS]
    for i, j in zip(test_repeats, TEST_BEGIN_LIST):
        i.begin = j

    test_repeat_list = rl.RepeatList(repeats=test_repeats)
    test_repeat_list.cluster("common_ancestry")

    # Check whether both lists include exactly the same elements.
    for i in [{0}, {1, 3}, {2}]:
        assert i in test_repeat_list.d_cluster["common_ancestry"]
    assert len(test_repeat_list.d_cluster["common_ancestry"]) == 3

    test_repeat_list.cluster("shared_char")

    # Check whether both lists include exactly the same elements.
    for i in [{0}, {1, 2, 3}]:
        assert i in test_repeat_list.d_cluster["shared_char"]
    assert len(test_repeat_list.d_cluster["shared_char"]) == 2
示例#18
0
    def detect(self, lHMM=None, denovo=None, **kwargs):
        """ Detects tandem repeats on ``self.seq`` from 2 possible sources.

        A list of ``Repeat`` instances is created for tandem repeat detections
        on the sequence from two possible sources:

        * Sequence profile hidden Markov models ``HMM``
        * de novo detection algorithms.

        Args:
            hmm (HMM): A list of ``HMM`` instances.
            denovo (bool): boolean
            *kwargs: Parameters fed to denovo TR prediction and/or Repeat
                instantiation. E.g. ``repeat = {"calc_score": True}``

        Returns:
            A ``RepeatList`` instance
        """

        if lHMM:
            if not isinstance(lHMM, list):
                raise Exception('The lHMM value is not a list.')
            for iHMM in lHMM:
                if not isinstance(iHMM, hmm.HMM):
                    raise Exception('At least one list element in the lHMM'
                                    'value is not a valid instance of the HMM'
                                    'class.')

            repeats = []
            for iHMM in lHMM:
                # Detect TRs on self.seq with hmm using the Viterbi algorithm.
                most_likely_path = iHMM.viterbi(self.seq)
                LOG.debug("most_likely_path: {}".format(most_likely_path))
                if not most_likely_path:
                    continue
                unaligned_msa = hmm_viterbi.hmm_path_to_non_aligned_tandem_repeat_units(
                    self.seq, most_likely_path, iHMM.l_effective)
                if len(unaligned_msa) > 1:
                    # Align the msa
                    aligned_msa = repeat_align.realign_repeat(unaligned_msa)
                    if len(aligned_msa) > 1:
                        # Create a Repeat() class with the new msa
                        if 'repeat' in kwargs:
                            repeats.append(
                                repeat.Repeat(aligned_msa, **kwargs['repeat']))
                        else:
                            repeats.append(repeat.Repeat(aligned_msa))

            # Set begin coordinate for all repeats
            for i_repeat in repeats:
                self.repeat_in_sequence(i_repeat)

            return repeat_list.RepeatList(repeats)

        elif lHMM == []:
            LOG.debug("lHMM == []")
            return None

        elif denovo:
            if 'detection' in kwargs:
                predicted_repeats = repeat_detection_run.run_detector(
                    [self], **kwargs['detection'])[0]
            else:
                predicted_repeats = repeat_detection_run.run_detector([self
                                                                       ])[0]

            LOG.debug("predicted_repeats: {}".format(predicted_repeats))
            repeats = []

            for jTRD, jlTR in predicted_repeats.items():
                for iTR in jlTR:
                    if 'repeat' in kwargs:
                        iTR = repeat.Repeat(iTR.msa,
                                            begin=iTR.begin,
                                            **kwargs['repeat'])
                    else:
                        iTR = repeat.Repeat(iTR.msa, begin=iTR.begin)

                    # Consider only tandem repeats that have a repeat unit
                    # predicted to be at least one character long.
                    if iTR.l_effective > 0:

                        # Save l, n, MSA, TRD, scores, sequence_type, position
                        # in sequence of given type
                        iTR.TRD = jTRD

                        # Sanity check repeat and set begin coordinate for
                        # all repeats
                        if not self.repeat_in_sequence(iTR):
                            LOG.debug("The tandem repeat is not part of" \
                                      "the sequence. Detector: %s", iTR.TRD)
                            continue

                        repeats.append(iTR)

            return repeat_list.RepeatList(repeats)

        else:
            raise Exception("Either require denovo detection, or provide an",
                            "HMM")