def train(self, train_data, test_data):
     sequences = train_data + test_data
     
     # number of symbols excluding the stop symbol
     num_symbols = count_unique_symbols(sequences)
     
     longest_sequence_len = longest_sequence_length(sequences)
     
     # keeps track of occurences of symbols at different positions: [position][symbol]
     self.table = [[0]*(num_symbols) for x in range(longest_sequence_len)]
     
     # increment counters for all symbols at all position, using all sequences in the test data
     for x in range(len(sequences)):
         for y in range(len(sequences[x])):
             self.table[y][sequences[x][y]] += 1
         # increment the stop symbol count
         
     #normalize
     for x in range(len(self.table)):
         sumval = sum(self.table[x])
         for y in range(len(self.table[x])):
             if sumval != 0:
                 self.table[x][y] = Decimal(self.table[x][y]) / Decimal(sumval)
             else:
                 self.table[x][y] = Decimal(1) / Decimal(num_symbols)
 def train(self, train_data, test_data):
     sequences = train_data + test_data
     self.num_symbols = count_unique_symbols(sequences)
     ll_bound = 10.0
     self.model = self.randommodel(self.num_states, self.num_symbols)
     prev = -1.0
     ll = -1.0
     while prev == -1.0 or ll - prev > ll_bound:
         prev = ll
         self.model = self.iterateEM(self.model, sequences)
         probs = self.computeprobabilities(self.model, sequences)
         ll = self.loglikelihood(probs)
 def train(self, train_data, test_data):
     self.train_data = train_data
     self.num_symbols = utilities.count_unique_symbols(train_data)
     self.i = 0