Exemplo n.º 1
0
 def _plot_motif(self, data, subseqs):
     # original structure input was a PWM
     if isinstance(subseqs[0], np.ndarray):
         rnas, structs = [], []
         for pwm in subseqs:
             idx = np.argmax(~np.isclose(pwm, 0), axis=1)
             rnas.append(''.join(data.alpha_coder.alph0[x] for x in idx //
                                 len(data.alpha_coder.alph1)))
             structs.append(
                 np.zeros((len(rnas[-1]), len(data.alpha_coder.alph1)),
                          dtype=np.float32))
             for i, val in enumerate(idx):
                 val = val - val % len(data.alpha_coder.alph1)
                 structs[-1][i] = pwm[i, val:(val +
                                              len(data.alpha_coder.alph1))]
         structs = np.sum(structs, 0) / len(structs)
         logo_rna = Motif(data.alpha_coder.alph0, sequences=rnas)
         logo_struct = Motif(data.alpha_coder.alph1, pwm=structs)
         return (logo_rna, logo_struct)
     # original structure input was a string
     if data.is_rna:
         rnas, structs = zip(*(data.alpha_coder.decode(seq)
                               for seq in subseqs))
         logo_rna = Motif(data.alpha_coder.alph0, sequences=rnas)
         logo_struct = Motif(data.alpha_coder.alph1, sequences=structs)
         return (logo_rna, logo_struct)
     # no structure input, just sequence
     return Motif(data.one_hot_encoder.alphabet, sequences=subseqs)
Exemplo n.º 2
0
 def setUp(self):
     self.ref_pwm = np.array(
         [[0, 0, 0.25, 0], [0.25, 0, 0, 0], [0, 0, 0, 0.25], [
             0, 0, 0, 0.25
         ], [0.25, 0, 0, 0], [0, 0.25, 0, 0], [0.25, 0, 0, 0]],
         dtype=np.float32)
     self.m = Motif("ACGT", ["GATTACA"])
     self.m2 = Motif("ACGT", pwm=self.ref_pwm)
Exemplo n.º 3
0
 def test_utils_save_as_meme(self):
     logos = [Motif('ACGT', ['GATTACA']), Motif('ACGT', ['AAAA'])]
     utils.save_as_meme(logos, gettempdir() + "/test.meme")
     with open(self.folder + "/data/ref.meme", 'rt') as handle:
         ref = handle.read()
     with open(gettempdir() + "/test.meme", 'rt') as handle:
         comp = handle.read()
     self.assertTrue(ref == comp)
     remove(gettempdir() + "/test.meme")
Exemplo n.º 4
0
 def _get_optimized_input(self, model, data, layer_name, node_index, boundary, lr, steps, colors_sequence, colors_structure):
     for attempt in range(5):
         input_data = np.random.uniform(-boundary, +boundary,
                                        (1, self.params["input_shape"][0], self.params["input_shape"][1]))
         input_data, success = self._optimize_input(model, layer_name, node_index, input_data, lr, steps)
         if success: break
     if not success:
         print("Warning: loss did not converge for node {} in layer '{}'".format(node_index, layer_name))
     input_data = np.apply_along_axis(utils.softmax, 1, input_data)
     if not data.is_rna:
         return [Motif(data.one_hot_encoder.alphabet, pwm = input_data).plot(colors_sequence, scale=0.25)]
     else:
         if data.is_rna_pwm:
             annotation_seq = ''.join(x*len(data.alpha_coder.alph1) for x in data.alpha_coder.alph0)
             annotation_struct = ''.join(data.alpha_coder.alph1 * len(data.alpha_coder.alph0))
         else:
             annotation_seq, annotation_struct = data.alpha_coder.decode(data.alpha_coder.alphabet)
         pwm_struct = self._extract_pwm(input_data, annotation_struct, data.alpha_coder.alph1)
         pwm_seq = self._extract_pwm(input_data, annotation_seq, data.alpha_coder.alph0)
         motif_struct = Motif(data.alpha_coder.alph1, pwm = pwm_struct).plot(colors_structure, scale=0.25)
         motif_seq = Motif(data.alpha_coder.alph0, pwm = pwm_seq).plot(colors_sequence, scale=0.25)
         return [motif_seq, motif_struct]
Exemplo n.º 5
0
class Test_Motif(unittest.TestCase):
    def setUp(self):
        self.ref_pwm = np.array(
            [[0, 0, 0.25, 0], [0.25, 0, 0, 0], [0, 0, 0, 0.25], [
                0, 0, 0, 0.25
            ], [0.25, 0, 0, 0], [0, 0.25, 0, 0], [0.25, 0, 0, 0]],
            dtype=np.float32)
        self.m = Motif("ACGT", ["GATTACA"])
        self.m2 = Motif("ACGT", pwm=self.ref_pwm)

    def test_motif_init(self):
        self.assertTrue(self.m.alphabet == "ACGT")
        self.assertTrue(self.m2.alphabet == "ACGT")

    def test_motif_valid_pwm(self):
        self.assertTrue(self.m.pwm.shape == (7, 4))
        self.assertTrue((self.m.pwm >= 0).all() and (self.m.pwm <= 1).all())
        self.assertTrue(np.allclose(np.sum(self.m.pwm, axis=1), [1] * 7))

        self.assertTrue(self.m2.pwm.shape == (7, 4))
        self.assertTrue((self.m2.pwm >= 0).all() and (self.m2.pwm <= 1).all())
        self.assertTrue(np.allclose(np.sum(self.m2.pwm, axis=1), [1] * 7))

        self.assertTrue(np.allclose(self.m.pwm, self.m2.pwm))

    def test_motif_valid_entropies(self):
        self.assertTrue(self.m.entropies.shape == (7, ))
        self.assertTrue((self.m.entropies >= 0).all()
                        and (self.m.entropies <= 2).all())

        self.assertTrue(self.m2.entropies.shape == (7, ))
        self.assertTrue((self.m2.entropies >= 0).all()
                        and (self.m2.entropies <= 2).all())

        self.assertTrue(np.allclose(self.m.entropies, self.m2.entropies))

    def test_motif_plot(self):
        self.assertTrue(isinstance(self.m.plot(), Image.Image))
        self.assertTrue(isinstance(self.m2.plot(), Image.Image))
Exemplo n.º 6
0
from random import choice
import gzip
from pysster.Motif import Motif


def rand_dna(length):
    return "".join(choice("ACGT") for x in range(length))


num = 5000
with gzip.open("artifical_pos.fasta.gz", "wt") as handle:
    seqs = [
        rand_dna(20) + "CCCCCCCCCC" + rand_dna(20) + "GGGGGGGGGG" +
        rand_dna(80) for x in range(num)
    ]
    for x in range(num):
        handle.write(">1\n{}\n".format(seqs[x]))
    Motif("ACGT", seqs).plot().save("pos_half1.png")
    seqs = [
        rand_dna(80) + "AAAAAAAAAA" + rand_dna(20) + "TTTTTTTTTT" +
        rand_dna(20) for x in range(num)
    ]
    for x in range(num):
        handle.write(">1\n{}\n".format(seqs[x]))
    Motif("ACGT", seqs).plot().save("pos_half2.png")

with gzip.open("artifical_neg.fasta.gz", "wt") as handle:
    seqs = [rand_dna(140) for x in range(num * 2)]
    Motif("ACGT", seqs).plot().save("neg.png")
    for x in range(num * 2):
        handle.write(">1\n{}\n".format(seqs[x]))