Exemplo n.º 1
0
    def mostLikely(self, normal, island, dnastrand):
        states = "NR"
        alphabet = "AGTC"

        normal = [float(x)/100 for x in normal]
        island = [float(x)/100 for x in island]
        
        p_initial = [1.0, 0.0]
        p_initial = asarray(p_initial)

        p_transition = []
        p_transition.append([1.0-normal[-1], normal[-1]])
        p_transition.append([island[-1], 1.0-island[-1]])
        p_transition = asarray(p_transition)
        
        p_emission = []   # 2x4 matrix
        p_emission.append(normal[:4])
        p_emission.append(island[:4])
        p_emission = asarray(p_emission)

        mm = MarkovModel.MarkovModel(
            states, alphabet, p_initial, p_transition, p_emission)

        x = MarkovModel.find_states(mm, dnastrand)
        states, x = x[0]
        return ''.join(states)
Exemplo n.º 2
0
    def mostLikely(self, normal, island, dnastrand):
        states = "NR"
        alphabet = "AGTC"

        normal = [float(x)/100 for x in normal]
        island = [float(x)/100 for x in island]
        
        p_initial = [1.0, 0.0]
        p_initial = asarray(p_initial)

        p_transition = []
        p_transition.append([1.0-normal[-1], normal[-1]])
        p_transition.append([island[-1], 1.0-island[-1]])
        p_transition = asarray(p_transition)
        
        p_emission = []   # 2x4 matrix
        p_emission.append(normal[:4])
        p_emission.append(island[:4])
        p_emission = asarray(p_emission)

        mm = MarkovModel.MarkovModel(
            states, alphabet, p_initial, p_transition, p_emission)

        x = MarkovModel.find_states(mm, dnastrand)
        states, x = x[0]
        return ''.join(states)
Exemplo n.º 3
0
 def test_topcoder5(self):
     # N
     states = "NR"
     alphabet = "AGTC"
     p_initial = array([1.0, 0.0])
     p_transition = array([[0.84, 0.16], [0.25, 0.75]])
     p_emission = array([[0.26, 0.37, 0.08, 0.29], [0.31, 0.13, 0.33,
                                                    0.23]])
     markov_model = MarkovModel.MarkovModel(states, alphabet, p_initial,
                                            p_transition, p_emission)
     states = MarkovModel.find_states(markov_model, "T")
     self.assertEqual(len(states), 1)
     state_list, state_float = states[0]
     self.assertEqual(state_list, ["N"])
Exemplo n.º 4
0
 def test_topcoder1(self):
     # NNNN
     states = "NR"
     alphabet = "AGTC"
     p_initial = array([1.0, 0.0])
     p_transition = array([[0.90, 0.10], [0.20, 0.80]])
     p_emission = array([[0.30, 0.20, 0.30, 0.20], [0.10, 0.40, 0.10,
                                                    0.40]])
     markov_model = MarkovModel.MarkovModel(states, alphabet, p_initial,
                                            p_transition, p_emission)
     states = MarkovModel.find_states(markov_model, "TGCC")
     self.assertEqual(len(states), 1)
     state_list, state_float = states[0]
     self.assertEqual(state_list, ["N", "N", "N", "N"])
Exemplo n.º 5
0
 def test_topcoder5(self):
     # N
     states = "NR"
     alphabet = "AGTC"
     p_initial = array([1.0, 0.0])
     p_transition = array([[0.84, 0.16],
                           [0.25, 0.75]])
     p_emission = array([[0.26, 0.37, 0.08, 0.29],
                         [0.31, 0.13, 0.33, 0.23]])
     markov_model = MarkovModel.MarkovModel(
         states, alphabet, p_initial, p_transition, p_emission)
     states = MarkovModel.find_states(markov_model, "T")
     self.assertEqual(len(states), 1)
     state_list, state_float = states[0]
     self.assertEqual(state_list, ["N"])
Exemplo n.º 6
0
 def test_topcoder4(self):
     # NRRRRRRRRRR
     states = "NR"
     alphabet = "AGTC"
     p_initial = array([1.0, 0.0])
     p_transition = array([[0.55, 0.45],
                           [0.15, 0.85]])
     p_emission = array([[0.75, 0.03, 0.01, 0.21],
                         [0.34, 0.11, 0.39, 0.16]])
     markov_model = MarkovModel.MarkovModel(
         states, alphabet, p_initial, p_transition, p_emission)
     states = MarkovModel.find_states(markov_model, "TTAGCAGTGCG")
     self.assertEqual(len(states), 1)
     state_list, state_float = states[0]
     self.assertEqual(state_list, ['N', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R'])
Exemplo n.º 7
0
 def test_topcoder3(self):
     # NRRRRRRRRRRRNNNNRRRRRRRRR
     states = "NR"
     alphabet = "AGTC"
     p_initial = array([1.0, 0.0])
     p_transition = array([[0.75, 0.25],
                           [0.25, 0.75]])
     p_emission = array([[0.45, 0.36, 0.06, 0.13],
                         [0.24, 0.18, 0.12, 0.46]])
     markov_model = MarkovModel.MarkovModel(
         states, alphabet, p_initial, p_transition, p_emission)
     states = MarkovModel.find_states(markov_model, "CCGTACTTACCCAGGACCGCAGTCC")
     self.assertEqual(len(states), 1)
     state_list, state_float = states[0]
     self.assertEqual(state_list, ['N', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'N', 'N', 'N', 'N', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R'])
Exemplo n.º 8
0
 def test_topcoder2(self):
     # NNNRRRNNRRNRRN
     states = "NR"
     alphabet = "AGTC"
     p_initial = array([1.0, 0.0])
     p_transition = array([[0.56, 0.44],
                           [0.25, 0.75]])
     p_emission = array([[0.04, 0.14, 0.62, 0.20],
                         [0.39, 0.15, 0.04, 0.42]])
     markov_model = MarkovModel.MarkovModel(
         states, alphabet, p_initial, p_transition, p_emission)
     states = MarkovModel.find_states(markov_model, "CCTGAGTTAGTCGT")
     self.assertEqual(len(states), 1)
     state_list, state_float = states[0]
     self.assertEqual(state_list, ['N', 'N', 'N', 'R', 'R', 'R', 'N', 'N', 'R', 'R', 'N', 'R', 'R', 'N'])
Exemplo n.º 9
0
 def test_topcoder1(self):
     # NNNN
     states = "NR"
     alphabet = "AGTC"
     p_initial = array([1.0, 0.0])
     p_transition = array([[0.90, 0.10],
                           [0.20, 0.80]])
     p_emission = array([[0.30, 0.20, 0.30, 0.20],
                         [0.10, 0.40, 0.10, 0.40]])
     markov_model = MarkovModel.MarkovModel(
         states, alphabet, p_initial, p_transition, p_emission)
     states = MarkovModel.find_states(markov_model, "TGCC")
     self.assertEqual(len(states), 1)
     state_list, state_float = states[0]
     self.assertEqual(state_list, ['N', 'N', 'N', 'N'])
Exemplo n.º 10
0
    def _eval_mm(self, motif_num=1, seq=''):
        """Return log_score_list of a sequence according to motif's HMM."""
        mm = self.hmms_list[motif_num - 1]
        hidden_states = len(mm.states)
        seq_len = len(seq)

        if seq_len < hidden_states:
            raise ValueError('Sequence must be at least as long as the motif')
        score = list()
        for i in range(seq_len - hidden_states + 1):
            seq_segment = seq[i:i + hidden_states - 1]
            result = MarkovModel.find_states(mm, seq_segment)
            score.append(result[0][1])

        eps = 1e-100
        log_score = [math.log(x + eps) for x in score]
        # zero padding
        for i in range(len(seq) - len(score)):
            log_score.append(0)
        return log_score
Exemplo n.º 11
0
    def _eval_mm(self, motif_num=1, seq=''):
        """Return log_score_list of a sequence according to motif's HMM."""
        mm = self.hmms_list[motif_num - 1]
        hidden_states = len(mm.states)
        seq_len = len(seq)

        if seq_len < hidden_states:
            raise ValueError('Sequence must be at least as long as the motif')
        score = list()
        for i in range(seq_len - hidden_states + 1):
            seq_segment = seq[i:i + hidden_states - 1]
            result = MarkovModel.find_states(mm, seq_segment)
            score.append(result[0][1])

        eps = 1e-100
        log_score = [math.log(x + eps) for x in score]
        # zero padding
        for i in range(len(seq) - len(score)):
            log_score.append(0)
        return log_score
Exemplo n.º 12
0
    def _get_occurence_indexandscore_mm(self, seq, motif_num):
        mm_i = self.hmms_list[motif_num]
        seq_len = len(seq)
        motif_len = len(mm_i.states)

        scores = list()
        start_indexes = list()

        for i in range(seq_len - motif_len + 1):
            segment_score = 0
            for j in range(motif_len):
                letter = seq[i + j]
                segment_score += MarkovModel.find_states(mm_i, letter)[0][1]
            if segment_score > self.threshold:
                scores.append(segment_score)
                start_indexes.append(i + 1)

        last_indexes = [i + motif_len for i in start_indexes]
        data = zip(start_indexes, last_indexes, scores)
        sorted_data = sorted(data, key=self._get_key, reverse=True)

        top_result = sorted_data[:self.k]
        return top_result
Exemplo n.º 13
0
    def _get_occurence_indexandscore_mm(self, seq, motif_num):
        mm_i = self.hmms_list[motif_num]
        seq_len = len(seq)
        motif_len = len(mm_i.states)

        scores = list()
        start_indexes = list()

        for i in range(seq_len - motif_len + 1):
            segment_score = 0
            for j in range(motif_len):
                letter = seq[i + j]
                segment_score += MarkovModel.find_states(mm_i, letter)[0][1]
            if segment_score > self.threshold:
                scores.append(segment_score)
                start_indexes.append(i + 1)

        last_indexes = [i + motif_len for i in start_indexes]
        data = zip(start_indexes, last_indexes, scores)
        sorted_data = sorted(data, key=self._get_key, reverse=True)

        top_result = sorted_data[:self.k]
        return top_result
Exemplo n.º 14
0
 def test_train_visible(self):
     states = ["0", "1", "2", "3"]
     alphabet = ["A", "C", "G", "T"]
     training_data = [
         ("AACCCGGGTTTTTTT", "001112223333333"),
         ("ACCGTTTTTTT", "01123333333"),
         ("ACGGGTTTTTT", "01222333333"),
         ("ACCGTTTTTTTT", "011233333333"),
         ]
     markov_model = MarkovModel.train_visible(states, alphabet, training_data)
     states = MarkovModel.find_states(markov_model, "AACGTT")
     self.assertEqual(len(states), 1)
     state_list, state_float = states[0]
     self.assertEqual(state_list, ['0', '0', '1', '2', '3', '3'])
     self.assertAlmostEqual(state_float, 0.0082128906)
     self.assertEqual(markov_model.states, ['0', '1', '2', '3'])
     self.assertEqual(markov_model.alphabet, ['A', 'C', 'G', 'T'])
     self.assertEqual(len(markov_model.p_initial), 4)
     self.assertAlmostEqual(markov_model.p_initial[0], 1.0)
     self.assertAlmostEqual(markov_model.p_initial[1], 0.0)
     self.assertAlmostEqual(markov_model.p_initial[2], 0.0)
     self.assertAlmostEqual(markov_model.p_initial[3], 0.0)
     self.assertEqual(len(markov_model.p_transition), 4)
     self.assertEqual(len(markov_model.p_transition[0]), 4)
     self.assertEqual(len(markov_model.p_transition[1]), 4)
     self.assertEqual(len(markov_model.p_transition[2]), 4)
     self.assertEqual(len(markov_model.p_transition[3]), 4)
     self.assertAlmostEqual(markov_model.p_transition[0][0], 0.2)
     self.assertAlmostEqual(markov_model.p_transition[0][1], 0.8)
     self.assertAlmostEqual(markov_model.p_transition[0][2], 0.0)
     self.assertAlmostEqual(markov_model.p_transition[0][3], 0.0)
     self.assertAlmostEqual(markov_model.p_transition[1][0], 0.0)
     self.assertAlmostEqual(markov_model.p_transition[1][1], 0.5)
     self.assertAlmostEqual(markov_model.p_transition[1][2], 0.5)
     self.assertAlmostEqual(markov_model.p_transition[1][3], 0.0)
     self.assertAlmostEqual(markov_model.p_transition[2][0], 0.0)
     self.assertAlmostEqual(markov_model.p_transition[2][1], 0.0)
     self.assertAlmostEqual(markov_model.p_transition[2][2], 0.5)
     self.assertAlmostEqual(markov_model.p_transition[2][3], 0.5)
     self.assertAlmostEqual(markov_model.p_transition[3][0], 0.0)
     self.assertAlmostEqual(markov_model.p_transition[3][1], 0.0)
     self.assertAlmostEqual(markov_model.p_transition[3][2], 0.0)
     self.assertAlmostEqual(markov_model.p_transition[3][3], 1.0)
     self.assertEqual(len(markov_model.p_emission), 4)
     self.assertEqual(len(markov_model.p_emission[0]), 4)
     self.assertEqual(len(markov_model.p_emission[1]), 4)
     self.assertEqual(len(markov_model.p_emission[2]), 4)
     self.assertEqual(len(markov_model.p_emission[3]), 4)
     self.assertAlmostEqual(markov_model.p_emission[0][0], 0.666667,
                            places=4)
     self.assertAlmostEqual(markov_model.p_emission[0][1], 0.111111,
                            places=4)
     self.assertAlmostEqual(markov_model.p_emission[0][2], 0.111111,
                            places=4)
     self.assertAlmostEqual(markov_model.p_emission[0][3], 0.111111,
                            places=4)
     self.assertAlmostEqual(markov_model.p_emission[1][0], 0.083333,
                            places=4)
     self.assertAlmostEqual(markov_model.p_emission[1][1], 0.750000,
                            places=4)
     self.assertAlmostEqual(markov_model.p_emission[1][2], 0.083333,
                            places=4)
     self.assertAlmostEqual(markov_model.p_emission[1][3], 0.083333,
                            places=4)
     self.assertAlmostEqual(markov_model.p_emission[2][0], 0.083333,
                            places=4)
     self.assertAlmostEqual(markov_model.p_emission[2][1], 0.083333,
                            places=4)
     self.assertAlmostEqual(markov_model.p_emission[2][2], 0.750000,
                            places=4)
     self.assertAlmostEqual(markov_model.p_emission[2][3], 0.083333,
                            places=4)
     self.assertAlmostEqual(markov_model.p_emission[3][0], 0.031250,
                            places=4)
     self.assertAlmostEqual(markov_model.p_emission[3][1], 0.031250,
                            places=4)
     self.assertAlmostEqual(markov_model.p_emission[3][2], 0.031250,
                            places=4)
     self.assertAlmostEqual(markov_model.p_emission[3][3], 0.906250,
                            places=4)
Exemplo n.º 15
0
alphabet = ["A", "C", "G", "T"]
training_data = [
    ("AACCCGGGTTTTTTT", "001112223333333"),
    ("ACCGTTTTTTT", "01123333333"),
    ("ACGGGTTTTTT", "01222333333"),
    ("ACCGTTTTTTTT", "011233333333"),
    ]
print "Training HMM"
mm = MarkovModel.train_visible(states, alphabet, training_data)
print "Classifying"

#print MarkovModel.find_states(mm, "AACGTT")
#Don't just print this, as the float may have different
#precision on different platforms.  This returns a list
#containing a tuple containing a list (fine), and a float.
states = MarkovModel.find_states(mm, "AACGTT")
for state_list, state_float in states:
    print "State %s, %0.10f" % (repr(state_list), state_float)
print_mm(mm)




print "TESTING baum welch"
states = ["CP", "IP"]
alphabet = ["cola", "ice_t", "lem"]
outputs = [
    (2, 1, 0)
    ]
print "Training HMM"
p_initial = [1.0, 0.0000001]
Exemplo n.º 16
0
alphabet = ["A", "C", "G", "T"]
training_data = [
    ("AACCCGGGTTTTTTT", "001112223333333"),
    ("ACCGTTTTTTT", "01123333333"),
    ("ACGGGTTTTTT", "01222333333"),
    ("ACCGTTTTTTTT", "011233333333"),
    ]
print "Training HMM"
mm = MarkovModel.train_visible(states, alphabet, training_data)
print "Classifying"

#print MarkovModel.find_states(mm, "AACGTT")
#Don't just print this, as the float may have different
#precision on different platforms.  This returns a list
#containing a tuple containing a list (fine), and a float.
states = MarkovModel.find_states(mm, "AACGTT")
for state_list, state_float in states :
    print "State %s, %0.10f" % (repr(state_list), state_float)
print_mm(mm)




print "TESTING baum welch"
states = ["CP", "IP"]
alphabet = ["cola", "ice_t", "lem"]
outputs = [
    (2, 1, 0)
    ]
print "Training HMM"
p_initial = [1.0, 0.0000001]