Пример #1
0
 def test_slfit(self):
     sequences = io.load_sequences(self._train_filename)
     hmm = HMM.from_file(self._model_filename)
     learner = SLHMM(self._num_hidden, self._num_observ)
     learner.fit(sequences, verbose=True)
     for sequence in sequences:
         pprint("True probability: %f" % hmm.predict(sequence))
         pprint("Infered probability: %f" % learner.predict(sequence))
Пример #2
0
 def test_slfit(self):
     sequences = io.load_sequences(self._train_filename)
     hmm = HMM.from_file(self._model_filename)
     learner = SLHMM(self._num_hidden, self._num_observ)
     learner.fit(sequences, verbose=True)
     for sequence in sequences:
         pprint("True probability: %f" % hmm.predict(sequence))
         pprint("Infered probability: %f" % learner.predict(sequence))
Пример #3
0
class Experimenter(object):
    '''
    This class is built to facilitate the experiments of different learning
    algorithms.
    '''
    def __init__(self, training_filename, test_filename, model_filename, num_hidden,
                 num_observ, num_em_restarts=20):
        self._training_data = [np.loadtxt(training_filename, dtype=np.int, delimiter=",")]
        # self._test_data = np.loadtxt(test_filename, dtype=np.int, delimiter=",")
        self._test_data = []
        with file(test_filename, "rb") as fin:
            reader = csv.reader(fin)
            for line in reader:
                self._test_data.append(np.asarray(map(int, line)))
        self._model = HMM.from_file(model_filename)
        self._num_hidden = num_hidden
        self._num_observ = num_observ
        self._num_em_restarts = num_em_restarts
    
    @property
    def training_size(self):
        return self._training_data[0].shape[0]
    
    @property
    def test_size(self):
        return len(self._test_data)

    @property
    def num_em_restarts(self):
        return self._num_em_restarts
                
    def _train(self, num_train_inst):
        '''
        Train a Hidden Markov Model with differnt learning algorithms
        '''
        num_train_inst = min(num_train_inst, self._training_data[0].shape[0])
        training_data = self._training_data[0][:num_train_inst]
        pprint("=" * 50)
        pprint("Training set length: %d" % num_train_inst)
        # Spectral learning algorithm
        start_time = time.time()
        self._sl_learner = SLHMM(self._num_hidden, self._num_observ)
        self._sl_learner.fit([training_data])
        end_time = time.time()
        pprint("Time used for Spectral Learner: %f" % (end_time - start_time))
        sl_time = end_time - start_time
        # Expectation Maximization algorithm
        #self._em_learners = []
        em_times = np.zeros(self._num_em_restarts, dtype=np.float)
        #for i in xrange(self._num_em_restarts):
            #self._em_learners.append(EMHMM(self._num_hidden, self._num_observ))
            #start_time = time.time()
            #self._em_learners[i].fit([training_data], max_iters=20, verbose=True)
            #end_time = time.time()
            #pprint("Time used for Expectation Maximization: %f" % (end_time - start_time))
            #em_times[i] = end_time - start_time
        return (sl_time, np.mean(em_times))
    
    def run_experiment(self, num_train_inst):
        '''
        @log_filename:    string, filepath of the output log
        '''
        sl_time, em_time = self._train(num_train_inst)
        true_probs = np.zeros(len(self._test_data), dtype=np.float)
        sl_probs = np.zeros(len(self._test_data), dtype=np.float)
        em_probs = np.zeros((self._num_em_restarts, len(self._test_data)), dtype=np.float)
        for i, seq in enumerate(self._test_data):
            true_probs[i] = self._model.predict(seq)
            sl_probs[i] = self._sl_learner.predict(seq)
            #for j in xrange(self._num_em_restarts):
                #em_probs[j, i] = self._em_learners[j].predict(seq)
        # L1-distance between true probability and inference probability by spectral learning
        sl_variation_dist = np.abs(true_probs - sl_probs)
        # L1-distance between true probability and inference probability by expectation maximization
        em_variation_dist = np.abs(true_probs - em_probs)
        # Sum of L1-distance
        sl_variation_measure = np.sum(sl_variation_dist)
        em_variation_measure = np.sum(em_variation_dist, axis=1)
        return (sl_time, em_time, sl_variation_measure, em_variation_measure)
Пример #4
0
class Experimenter(object):
    '''
    This class is built to facilitate the experiments of different learning
    algorithms.
    '''
    def __init__(self,
                 training_filename,
                 test_filename,
                 model_filename,
                 num_hidden,
                 num_observ,
                 num_em_restarts=20):
        self._training_data = [
            np.loadtxt(training_filename, dtype=np.int, delimiter=",")
        ]
        # self._test_data = np.loadtxt(test_filename, dtype=np.int, delimiter=",")
        self._test_data = []
        with file(test_filename, "rb") as fin:
            reader = csv.reader(fin)
            for line in reader:
                self._test_data.append(np.asarray(map(int, line)))
        self._model = HMM.from_file(model_filename)
        self._num_hidden = num_hidden
        self._num_observ = num_observ
        self._num_em_restarts = num_em_restarts

    @property
    def training_size(self):
        return self._training_data[0].shape[0]

    @property
    def test_size(self):
        return len(self._test_data)

    @property
    def num_em_restarts(self):
        return self._num_em_restarts

    def _train(self, num_train_inst):
        '''
        Train a Hidden Markov Model with differnt learning algorithms
        '''
        num_train_inst = min(num_train_inst, self._training_data[0].shape[0])
        training_data = self._training_data[0][:num_train_inst]
        pprint("=" * 50)
        pprint("Training set length: %d" % num_train_inst)
        # Spectral learning algorithm
        start_time = time.time()
        self._sl_learner = SLHMM(self._num_hidden, self._num_observ)
        self._sl_learner.fit([training_data])
        end_time = time.time()
        pprint("Time used for Spectral Learner: %f" % (end_time - start_time))
        sl_time = end_time - start_time
        # Expectation Maximization algorithm
        #self._em_learners = []
        em_times = np.zeros(self._num_em_restarts, dtype=np.float)
        #for i in xrange(self._num_em_restarts):
        #self._em_learners.append(EMHMM(self._num_hidden, self._num_observ))
        #start_time = time.time()
        #self._em_learners[i].fit([training_data], max_iters=20, verbose=True)
        #end_time = time.time()
        #pprint("Time used for Expectation Maximization: %f" % (end_time - start_time))
        #em_times[i] = end_time - start_time
        return (sl_time, np.mean(em_times))

    def run_experiment(self, num_train_inst):
        '''
        @log_filename:    string, filepath of the output log
        '''
        sl_time, em_time = self._train(num_train_inst)
        true_probs = np.zeros(len(self._test_data), dtype=np.float)
        sl_probs = np.zeros(len(self._test_data), dtype=np.float)
        em_probs = np.zeros((self._num_em_restarts, len(self._test_data)),
                            dtype=np.float)
        for i, seq in enumerate(self._test_data):
            true_probs[i] = self._model.predict(seq)
            sl_probs[i] = self._sl_learner.predict(seq)
            #for j in xrange(self._num_em_restarts):
            #em_probs[j, i] = self._em_learners[j].predict(seq)
        # L1-distance between true probability and inference probability by spectral learning
        sl_variation_dist = np.abs(true_probs - sl_probs)
        # L1-distance between true probability and inference probability by expectation maximization
        em_variation_dist = np.abs(true_probs - em_probs)
        # Sum of L1-distance
        sl_variation_measure = np.sum(sl_variation_dist)
        em_variation_measure = np.sum(em_variation_dist, axis=1)
        return (sl_time, em_time, sl_variation_measure, em_variation_measure)