Exemplo n.º 1
0
    def _create_mm(self, motif_num, alphabet):
        try:
            # Only EDeN has original_motives_list
            input_motif = self.original_motives_list[motif_num - 1]
        except AttributeError:
            input_motif = self.motives_list[motif_num - 1]

        headers, instances = [list(x) for x in zip(*input_motif)]

        lengths = [len(instances[i]) for i in range(len(instances))]
        median_len = int(math.ceil(np.median(lengths)))

        # Hidden states for Markov Model
        states = [str(i + 1) for i in range(median_len)]

        print "original samples: %d" % len(instances)
        print "states:", len(states)
        # under sampling
        if (len(instances) * len(states)) > 500:
            samples = 500 / len(states)
            # samples = 50    # fixed sampling
            print 'sample size = %d' % samples
            instances = random.sample(instances, samples)

        instances = random.sample(instances, samples)

        try:
            mm = MarkovModel.train_bw(states=states,
                                      alphabet=alphabet,
                                      training_data=instances)
        except RuntimeError, msg:
            raise RuntimeError("Motif data is too large. " + str(msg))
Exemplo n.º 2
0
    def _create_mm(self, motif_num, alphabet):
        try:
            # Only EDeN has original_motives_list
            input_motif = self.original_motives_list[motif_num - 1]
        except AttributeError:
            input_motif = self.motives_list[motif_num - 1]

        headers, instances = [list(x) for x in zip(*input_motif)]

        lengths = [len(instances[i]) for i in range(len(instances))]
        median_len = int(math.ceil(np.median(lengths)))

        # Hidden states for Markov Model
        states = [str(i + 1) for i in range(median_len)]

        print "original samples: %d" % len(instances)
        print "states:", len(states)
        # under sampling
        if (len(instances) * len(states)) > 500:
            samples = 500 / len(states)
            # samples = 50    # fixed sampling
            print 'sample size = %d' % samples
            instances = random.sample(instances, samples)

        instances = random.sample(instances, samples)

        try:
            mm = MarkovModel.train_bw(states=states,
                                      alphabet=alphabet,
                                      training_data=instances)
        except RuntimeError, msg:
            raise RuntimeError("Motif data is too large. " + str(msg))
Exemplo n.º 3
0
    def test_train_bw(self):
        random.seed(0)
        states = ["0", "1", "2", "3"]
        alphabet = ["A", "C", "G", "T"]
        training_data = [
            "AACCCGGGTTTTTTT",
            "ACCGTTTTTTT",
            "ACGGGTTTTTT",
            "ACCGTTTTTTTT",
        ]

        output_p_initial = array([0.2275677, 0.29655611, 0.24993822, 0.22593797])
        output_p_transition = array(
            [
                [5.16919807e-001, 3.65825814e-033, 4.83080193e-001, 9.23220689e-042],
                [3.65130247e-001, 1.00000000e-300, 6.34869753e-001, 1.00000000e-300],
                [8.68776164e-001, 1.02254304e-034, 1.31223836e-001, 6.21835051e-047],
                [3.33333333e-301, 3.33333333e-001, 3.33333333e-301, 6.66666667e-001],
            ]
        )
        output_p_emission = array(
            [
                [2.02593570e-301, 2.02593570e-301, 2.02593570e-301, 1.00000000e000],
                [1.00000000e-300, 1.00000000e-300, 1.00000000e000, 1.09629016e-259],
                [3.26369779e-301, 3.26369779e-301, 3.26369779e-301, 1.00000000e000],
                [3.33333333e-001, 6.66666667e-001, 3.33333333e-301, 3.33333333e-301],
            ]
        )

        markov_model = MarkovModel.train_bw(states, alphabet, training_data)
        self.assertEqual("".join(markov_model.states), "".join(states))
        self.assertEqual("".join(markov_model.alphabet), "".join(alphabet))
        self.assertTrue(
            array_equal(
                around(markov_model.p_initial, decimals=3),
                around(output_p_initial, decimals=3),
            )
        )
        self.assertTrue(
            array_equal(
                around(markov_model.p_transition, decimals=3),
                around(output_p_transition, decimals=3),
            )
        )
        self.assertTrue(
            array_equal(
                around(markov_model.p_emission, decimals=3),
                around(output_p_emission, decimals=3),
            )
        )
Exemplo n.º 4
0
    def test_train_bw(self):
        random.seed(0)
        states = ["0", "1", "2", "3"]
        alphabet = ["A", "C", "G", "T"]
        training_data = ["AACCCGGGTTTTTTT", "ACCGTTTTTTT",
                         "ACGGGTTTTTT", "ACCGTTTTTTTT"]

        output_p_initial = array([0.2275677, 0.29655611,
                                  0.24993822, 0.22593797])
        output_p_transition = array(
            [[5.16919807e-001, 3.65825814e-033, 4.83080193e-001, 9.23220689e-042],
             [3.65130247e-001,
              1.00000000e-300,
              6.34869753e-001,
              1.00000000e-300],
             [8.68776164e-001,
              1.02254304e-034,
              1.31223836e-001,
              6.21835051e-047],
             [3.33333333e-301, 3.33333333e-001, 3.33333333e-301, 6.66666667e-001]])
        output_p_emission = array(
            [[2.02593570e-301, 2.02593570e-301, 2.02593570e-301, 1.00000000e+000],
             [1.00000000e-300,
              1.00000000e-300,
              1.00000000e+000,
              1.09629016e-259],
             [3.26369779e-301,
              3.26369779e-301,
              3.26369779e-301,
              1.00000000e+000],
             [3.33333333e-001, 6.66666667e-001, 3.33333333e-301, 3.33333333e-301]])

        markov_model = MarkovModel.train_bw(states, alphabet, training_data)
        self.assertEqual(''.join(markov_model.states), ''.join(states))
        self.assertEqual(''.join(markov_model.alphabet), ''.join(alphabet))
        self.assertTrue(array_equal(
            around(markov_model.p_initial, decimals=3),
            around(output_p_initial, decimals=3)))
        self.assertTrue(array_equal(around(
            markov_model.p_transition, decimals=3),
            around(output_p_transition, decimals=3)))
        self.assertTrue(array_equal(around(
            markov_model.p_emission, decimals=3),
            around(output_p_emission, decimals=3)))