Пример #1
0
    def __init__(self, 
                 base_length=7, 
                 init_method=SpecializedHMM.InitMethod.random, 
                 training_examples=[],
                 alphabet=get_example_alphabet()):
        '''
        Training examples is only used if InitMethod.count_based is used
        '''
        # if from_string_string != None:
        #     #init from string
        #     pi, A, B, V = eval(from_string_string)
        #     super(baseHMM,self).__init__(pi, A, B, V)
        #     return
        self.base_length = base_length
        self.init_method = init_method
        self.training_examples = training_examples
        self.alphabet = alphabet
        if(self.init_method==SpecializedHMM.InitMethod.count_based and
           len(self.training_examples)==0):
            raise "Training examples needs to be provided when init method is count based"
        
        #Construct the state transition matrix
        self.number_of_states = base_length + 2
        
        #state transition matrix
        A = []
        #From state 1 to state 2 the probability is 
        state1 = zeros(self.number_of_states)
        state1[1]=1
        #state1 = [0, 1, 0, 0, .. ,0]

        A.append(state1)
        for i in range(1,self.number_of_states-1):
            state_row = self.init_transition_matrix_row(i)
            A.append(state_row)
        #last state can only be transfered to state1 with probability 1
        last_state = zeros(self.number_of_states)
        last_state[0]=1
        A.append(last_state)
        #init state emission probabilities...
        self.number_of_emissions = len(self.alphabet) + 2
        B = []
        #init the first row with specific probability for @
        B.append(zeros(self.number_of_emissions))
        B[0][0] = 1
        #init the rest emission probabilities without the last row
        for i in range(1, self.number_of_states-1):
            B.append(self.init_emission_probablity_matrix_row(i))
        #init the last row for specific probability for $
        B.append(zeros(self.number_of_emissions))
        B[self.number_of_states-1][self.number_of_emissions-1] = 1
        #Set of emission symbols
        V = ['@'] + self.alphabet + ['$']
        #Initial state
        pi = zeros(self.number_of_states)
        pi[0] = 1
        
        super(baseHMM,self).__init__(pi, A, B, V)
Пример #2
0
 def __init__(self, 
              word_length=7, 
              init_method=SpecializedHMM.InitMethod.random, 
              training_examples=[],
              alphabet=get_example_alphabet(),
              from_string_string = None):
     '''
     Training examples is only used if InitMethod.count_based is used
     '''
     if from_string_string != None:
         #init from string
         pi, A, B, V = eval(from_string_string)
         super(WordHMM,self).__init__(pi, A, B, V)
         return
     self.word_length = word_length
     self.init_method = init_method
     self.training_examples = training_examples
     self.alphabet = alphabet
     if(self.init_method==SpecializedHMM.InitMethod.count_based and
        len(self.training_examples)==0):
         raise "Training examples needs to be provided when init method is count based"
     
     #Construct the state transition matrix
     self.number_of_states = word_length + 2
     #state transition matrix
     A = []
     #From state 1 to state 2 the probability is 
     state1 = zeros(self.number_of_states)
     state1[1]=1
     A.append(state1)
     for i in range(1,self.number_of_states-1):
         state_row = self.init_transition_matrix_row(i)
         A.append(state_row)
     #last state can only be transfered to state1 with probability 1
     last_state = zeros(self.number_of_states)
     last_state[0]=1
     A.append(last_state)
     #init state emission probabilities...
     self.number_of_emissions = len(self.alphabet) + 2
     B = []
     #init the first row with specific probability for @
     B.append(zeros(self.number_of_emissions))
     B[0][0] = 1
     #init the rest emission probabilities without the last row
     for i in range(1, self.number_of_states-1):
         B.append(self.init_emission_probablity_matrix_row(i))
     #init the last row for specific probability for $
     B.append(zeros(self.number_of_emissions))
     B[self.number_of_states-1][self.number_of_emissions-1] = 1
     #Set of emission symbols
     V = ['@'] + self.alphabet + ['$']
     #Initial state
     pi = zeros(self.number_of_states)
     pi[0] = 1
     super(WordHMM,self).__init__(pi, A, B, V)
Пример #3
0
 def init_transition_matrix_row(self, row_index):
     if(self.init_method==SpecializedHMM.InitMethod.random):
         return zeros_and_random_with_sum1(self.number_of_states, self.number_of_states-row_index)
     elif(self.init_method==SpecializedHMM.InitMethod.count_based):
         row = (zeros(row_index) + 
                list_with_sum_and_equal_elements(self.number_of_states-row_index-1,0.2))
         row.insert(row_index+1,0.8)
         return row            
     else:
         raise "Init Method Not Supported"
Пример #4
0
 def init_transition_matrix_row(self, row_index):
     if(self.init_method==SpecializedHMM.InitMethod.random):
         return zeros_and_random_with_sum1(self.number_of_states, self.number_of_states-row_index)
     elif(self.init_method==SpecializedHMM.InitMethod.count_based):
         row = (zeros(row_index) + 
                list_with_sum_and_equal_elements(self.number_of_states-row_index-1,0.2))
         row.insert(row_index+1,0.8)
         return row            
     else:
         raise "Init Method Not Supported"
Пример #5
0
 def count_position(position):
     #pseudocount
     use_pseudocount = True
     uniform_pseudocount = False
     if use_pseudocount:
         if uniform_pseudocount:
             count_list = zeros(alphabet_size)
             for i in range(0,alphabet_size):
                 count_list[i]= (nr_of_training_examples*0.1)/alphabet_size
         else:
             count_list = random_list_with_sum(alphabet_size,
                                               nr_of_training_examples*0.1)
     else:
         count_list = zeros(alphabet_size)
     #Do the counting
     for e in self.training_examples:
         if position < len(e):
             character_index = alphabet.index(e[position])
             count_list[character_index] = count_list[character_index] + 1
     return count_list
Пример #6
0
 def count_position(position):
     #pseudocount
     use_pseudocount = True
     uniform_pseudocount = False
     if use_pseudocount:
         if uniform_pseudocount:
             count_list = zeros(alphabet_size)
             for i in range(0,alphabet_size):
                 count_list[i]= (nr_of_training_examples*0.1)/alphabet_size
         else:
             count_list = random_list_with_sum(alphabet_size,
                                               nr_of_training_examples*0.1)
     else:
         count_list = zeros(alphabet_size)
     
     #Do the counting
     for e in self.training_examples:
         if position < len(e):
             character_index = alphabet.index(e[position])
             count_list[character_index] = count_list[character_index] + 1
     return count_list