def test_slfit(self): sequences = io.load_sequences(self._train_filename) hmm = HMM.from_file(self._model_filename) learner = SLHMM(self._num_hidden, self._num_observ) learner.fit(sequences, verbose=True) for sequence in sequences: pprint("True probability: %f" % hmm.predict(sequence)) pprint("Infered probability: %f" % learner.predict(sequence))
class Experimenter(object): ''' This class is built to facilitate the experiments of different learning algorithms. ''' def __init__(self, training_filename, test_filename, model_filename, num_hidden, num_observ, num_em_restarts=20): self._training_data = [np.loadtxt(training_filename, dtype=np.int, delimiter=",")] # self._test_data = np.loadtxt(test_filename, dtype=np.int, delimiter=",") self._test_data = [] with file(test_filename, "rb") as fin: reader = csv.reader(fin) for line in reader: self._test_data.append(np.asarray(map(int, line))) self._model = HMM.from_file(model_filename) self._num_hidden = num_hidden self._num_observ = num_observ self._num_em_restarts = num_em_restarts @property def training_size(self): return self._training_data[0].shape[0] @property def test_size(self): return len(self._test_data) @property def num_em_restarts(self): return self._num_em_restarts def _train(self, num_train_inst): ''' Train a Hidden Markov Model with differnt learning algorithms ''' num_train_inst = min(num_train_inst, self._training_data[0].shape[0]) training_data = self._training_data[0][:num_train_inst] pprint("=" * 50) pprint("Training set length: %d" % num_train_inst) # Spectral learning algorithm start_time = time.time() self._sl_learner = SLHMM(self._num_hidden, self._num_observ) self._sl_learner.fit([training_data]) end_time = time.time() pprint("Time used for Spectral Learner: %f" % (end_time - start_time)) sl_time = end_time - start_time # Expectation Maximization algorithm #self._em_learners = [] em_times = np.zeros(self._num_em_restarts, dtype=np.float) #for i in xrange(self._num_em_restarts): #self._em_learners.append(EMHMM(self._num_hidden, self._num_observ)) #start_time = time.time() #self._em_learners[i].fit([training_data], max_iters=20, verbose=True) #end_time = time.time() #pprint("Time used for Expectation Maximization: %f" % (end_time - start_time)) #em_times[i] = end_time - start_time return (sl_time, np.mean(em_times)) def run_experiment(self, num_train_inst): ''' @log_filename: string, filepath of the output log ''' sl_time, em_time = self._train(num_train_inst) true_probs = np.zeros(len(self._test_data), dtype=np.float) sl_probs = np.zeros(len(self._test_data), dtype=np.float) em_probs = np.zeros((self._num_em_restarts, len(self._test_data)), dtype=np.float) for i, seq in enumerate(self._test_data): true_probs[i] = self._model.predict(seq) sl_probs[i] = self._sl_learner.predict(seq) #for j in xrange(self._num_em_restarts): #em_probs[j, i] = self._em_learners[j].predict(seq) # L1-distance between true probability and inference probability by spectral learning sl_variation_dist = np.abs(true_probs - sl_probs) # L1-distance between true probability and inference probability by expectation maximization em_variation_dist = np.abs(true_probs - em_probs) # Sum of L1-distance sl_variation_measure = np.sum(sl_variation_dist) em_variation_measure = np.sum(em_variation_dist, axis=1) return (sl_time, em_time, sl_variation_measure, em_variation_measure)
class Experimenter(object): ''' This class is built to facilitate the experiments of different learning algorithms. ''' def __init__(self, training_filename, test_filename, model_filename, num_hidden, num_observ, num_em_restarts=20): self._training_data = [ np.loadtxt(training_filename, dtype=np.int, delimiter=",") ] # self._test_data = np.loadtxt(test_filename, dtype=np.int, delimiter=",") self._test_data = [] with file(test_filename, "rb") as fin: reader = csv.reader(fin) for line in reader: self._test_data.append(np.asarray(map(int, line))) self._model = HMM.from_file(model_filename) self._num_hidden = num_hidden self._num_observ = num_observ self._num_em_restarts = num_em_restarts @property def training_size(self): return self._training_data[0].shape[0] @property def test_size(self): return len(self._test_data) @property def num_em_restarts(self): return self._num_em_restarts def _train(self, num_train_inst): ''' Train a Hidden Markov Model with differnt learning algorithms ''' num_train_inst = min(num_train_inst, self._training_data[0].shape[0]) training_data = self._training_data[0][:num_train_inst] pprint("=" * 50) pprint("Training set length: %d" % num_train_inst) # Spectral learning algorithm start_time = time.time() self._sl_learner = SLHMM(self._num_hidden, self._num_observ) self._sl_learner.fit([training_data]) end_time = time.time() pprint("Time used for Spectral Learner: %f" % (end_time - start_time)) sl_time = end_time - start_time # Expectation Maximization algorithm #self._em_learners = [] em_times = np.zeros(self._num_em_restarts, dtype=np.float) #for i in xrange(self._num_em_restarts): #self._em_learners.append(EMHMM(self._num_hidden, self._num_observ)) #start_time = time.time() #self._em_learners[i].fit([training_data], max_iters=20, verbose=True) #end_time = time.time() #pprint("Time used for Expectation Maximization: %f" % (end_time - start_time)) #em_times[i] = end_time - start_time return (sl_time, np.mean(em_times)) def run_experiment(self, num_train_inst): ''' @log_filename: string, filepath of the output log ''' sl_time, em_time = self._train(num_train_inst) true_probs = np.zeros(len(self._test_data), dtype=np.float) sl_probs = np.zeros(len(self._test_data), dtype=np.float) em_probs = np.zeros((self._num_em_restarts, len(self._test_data)), dtype=np.float) for i, seq in enumerate(self._test_data): true_probs[i] = self._model.predict(seq) sl_probs[i] = self._sl_learner.predict(seq) #for j in xrange(self._num_em_restarts): #em_probs[j, i] = self._em_learners[j].predict(seq) # L1-distance between true probability and inference probability by spectral learning sl_variation_dist = np.abs(true_probs - sl_probs) # L1-distance between true probability and inference probability by expectation maximization em_variation_dist = np.abs(true_probs - em_probs) # Sum of L1-distance sl_variation_measure = np.sum(sl_variation_dist) em_variation_measure = np.sum(em_variation_dist, axis=1) return (sl_time, em_time, sl_variation_measure, em_variation_measure)