Exemplo n.º 1
0
    def test_fold_rna(self):
        """RNA folding to find min energy secondary structure."""

        # unafold's estimates for free energy estimates of RNA oligos
        # most tests available at https://github.com/jaswindersingh2/SPOT-RNA/blob/master/sample_inputs/batch_seq.fasta
        unafold_dgs = {
            "ACCCCCUCCUUCCUUGGAUCAAGGGGCUCAA":
            -9.5,
            "AAGGGGUUGGUCGCCUCGACUAAGCGGCUUGGAAUUCC":
            -10.1,
            "UUGGAGUACACAACCUGUACACUCUUUC":
            -4.3,
            "AGGGAAAAUCCC":
            -3.3,
            "GCUUACGAGCAAGUUAAGCAAC":
            -4.6,
            "UGGGAGGUCGUCUAACGGUAGGACGGCGGACUCUGGAUCCGCUGGUGGAGGUUCGAGUCCUCCCCUCCCAGCCA":
            -32.8,
            "GGGCGAUGAGGCCCGCCCAAACUGCCCUGAAAAGGGCUGAUGGCCUCUACUG":
            -20.7,
            "GGGGGCAUAGCUCAGCUGGGAGAGCGCCUGCUUUGCACGCAGGAGGUCUGCGGUUCGAUCCCGCGCGCUCCCACCA":
            -31.4,
        }

        for seq, ufold in unafold_dgs.items():
            d = dg(seq, temp=37.0)

            # accepting a 5% difference
            delta = abs(0.5 * min(d, ufold))
            self.assertAlmostEqual(d, ufold, delta=delta)
Exemplo n.º 2
0
    def test_fold_dna(self):
        """DNA folding to find min energy secondary structure."""

        # unafold's estimates for free energy estimates of DNA oligos
        unafold_dgs = {
            "GGGAGGTCGTTACATCTGGGTAACACCGGTACTGATCCGGTGACCTCCC":
            -10.94,  # three branched structure
            "GGGAGGTCGCTCCAGCTGGGAGGAGCGTTGGGGGTATATACCCCCAACACCGGTACTGATCCGGTGACCTCCC":
            -23.4,  # four branched structure
            "CGCAGGGAUACCCGCG":
            -3.8,
            "TAGCTCAGCTGGGAGAGCGCCTGCTTTGCACGCAGGAGGT":
            -6.85,
            "GGGGGCATAGCTCAGCTGGGAGAGCGCCTGCTTTGCACGCAGGAGGTCTGCGGTTCGATCCCGCGCGCTCCCACCA":
            -15.50,
            "TGAGACGGAAGGGGATGATTGTCCCCTTCCGTCTCA":
            -18.10,
            "ACCCCCTCCTTCCTTGGATCAAGGGGCTCAA":
            -3.65,
        }

        for seq, ufold in unafold_dgs.items():
            d = dg(seq, temp=37.0)

            # accepting a 60% difference
            delta = abs(0.6 * min(d, ufold))
            self.assertAlmostEqual(d, ufold, delta=delta)
Exemplo n.º 3
0
    def test_fold_cache(self):
        """Gather a cache of the folded structure."""

        seq = "ATGGATTTAGATAGAT"
        cache = dg_cache(seq)
        seq_dg = dg(seq)

        self.assertAlmostEqual(seq_dg, cache[0][len(seq) - 1], delta=1)
def seqfoldScore(queries, returnSS=False):
    '''
    get the secondary structure for a given sequence
    using seqfold here - identical features are available using nupack, though results are sometimes different
    :param sequence:
    :return:
    '''
    temperature = 37.0  # celcius
    sequences = numbers2letters(queries)

    energies = np.zeros(len(sequences))
    strings = []
    pairLists = []
    i = -1
    for sequence in sequences:
        i += 1
        if len(sequence) == 1:
            en = np.inf
        else:
            en = dg(sequence, temp=temperature
                    )  # get predicted minimum energy of folded structure
        if np.isfinite(en):
            if en > 1500:  # no idea why it does this but sometimes it adds 1600 - we will upgrade this to nupack in the future
                energies[i] = en - 1600
            else:
                energies[i] = en
        else:
            energies[i] = 5  # np.nan # set infinities as being very unlikely

        if returnSS:
            structs = fold(sequence)  # identify structural features
            # print(round(sum(s.e for s in structs), 2)) # predicted energy of the final structure

            desc = ["."] * len(sequence)
            pairList = []
            for s in structs:
                pairList.append(s.ij[0])
                if len(s.ij) == 1:
                    i, j = s.ij[0]
                    desc[i] = "("
                    desc[j] = ")"

            ssString = "".join(desc)  # secondary structure string
            strings.append(ssString)
            pairList = np.asarray(pairList) + 1  # list of paired bases
            pairLists.append(pairList)

    if returnSS:
        return energies, strings, pairLists
    else:
        return energies
Exemplo n.º 5
0
    def test_fold(self):
        """Fold function."""

        # it should throw if a nonsense sequence is provided
        with self.assertRaises(RuntimeError):
            dg("EASFEASFAST", 37.0)

        # Both U and T, mix of RNA and DNA
        with self.assertRaises(RuntimeError):
            dg("ATGCATGACGATUU", 37.0)

        # should not throw
        dg("ATGGATTTAGATAGAT")