示例#1
0
    def __init__(self,
                 train_valid_test_ratio=[8, 1, 1],
                 preprocessor=None,
                 noise=None,
                 batch_size=100,
                 num_batches=None,
                 iter_class='SequentialSubsetIterator',
                 rng=None,
                 log=None):
        '''
        DESCRIPTION: Abstract class
        PARAMS:
            split_mode(sequential | random): sequentially or randomly split the dataset
        '''

        assert len(train_valid_test_ratio) == 3, 'the size of list is not 3'
        self.ratio = train_valid_test_ratio
        self.preprocessor = preprocessor
        self.noise = noise
        self.iter_class = iter_class
        self.batch_size = batch_size
        self.num_batches = num_batches
        self.rng = rng

        self.log = log

        if self.log is None:
            # use default Log setting
            self.log = Log(logger=internal_logger)
示例#2
0
 def build_log(self, save_to_database=None, id=None):
     log = Log(experiment_name = id is not None and '%s_%s'%(self.state.log.experiment_name,id) \
                                 or self.state.log.experiment_name,
             description = self.state.log.description,
             save_outputs = self.state.log.save_outputs,
             save_learning_rule = self.state.log.save_learning_rule,
             save_model = self.state.log.save_model,
             save_epoch_error = self.state.log.save_epoch_error,
             save_to_database = save_to_database)
     return log
示例#3
0
    def __init__(self,
                 model,
                 dataset,
                 learning_rule,
                 learning_method,
                 log=None):
        self.model = model
        self.dataset = dataset
        self.learning_rule = learning_rule
        self.learning_method = learning_method
        self.log = log

        if self.log is None:
            # use default Log setting
            self.log = Log(logger=internal_logger)

        elif self.log.save_to_database:
            self.log.print_records()
            print('\n')

        self.log.info('..begin setting up train object')
        self.setup()
示例#4
0
    def __init__(self, model, dataset, learning_rule, learning_method, log=None):
        self.model = model
        self.dataset = dataset
        self.learning_rule = learning_rule
        self.learning_method = learning_method
        self.log = log

        if self.log is None:
            # use default Log setting
            self.log = Log(logger=internal_logger)

        elif self.log.save_to_database:
            self.log.print_records()
            print('\n')

        self.log.info( '..begin setting up train object')
        self.setup()
示例#5
0
class TrainObject():
    '''
    UPDATES:
        (Normal momentum)
        delta := momentum * delta - learning_rate * (d cost(param) / d param)
        param := param + delta

        (Nesterov momentum)
        delta := momentum * delta - learning_rate * (d cost(param + momentum*delta) / d param)
        param := param + delta
    '''
    def __init__(self,
                 model,
                 dataset,
                 learning_rule,
                 learning_method,
                 log=None):
        self.model = model
        self.dataset = dataset
        self.learning_rule = learning_rule
        self.learning_method = learning_method
        self.log = log

        if self.log is None:
            # use default Log setting
            self.log = Log(logger=internal_logger)

        elif self.log.save_to_database:
            self.log.print_records()
            print('\n')

        self.log.info('..begin setting up train object')
        self.setup()

    def setup(self):

        #================[ check output dim with target size ]================#

        assert self.model.input_dim == self.dataset.feature_size(), \
                'input dim: ' + str(self.model.input_dim) + \
                ', is not equal to feature size: ' + str(self.dataset.feature_size())

        assert self.model.layers[-1].dim == self.dataset.target_size(), \
                'output dim: ' + str(self.model.layers[-1].dim) + \
                ', is not equal to target size: ' + str(self.dataset.target_size())

        #===================[ build params and deltas list ]==================#

        def is_shared_var(var):
            return var.__class__.__name__ == 'TensorSharedVariable' or \
                    var.__class__.__name__ == 'CudaNdarraySharedVariable'

        params = []
        deltas = []

        prev_layer_dim = self.model.input_dim
        for layer in self.model.layers:

            # append layer params that will be updated during training
            if len(layer.params) > 0:
                for param in layer.params:
                    params += [param]
                    deltas += [
                        theano.shared(
                            np.zeros(param.shape.eval(), dtype=floatX))
                    ]

            if is_shared_var(layer.W):
                assert layer.W.dtype == floatX
                params += [layer.W]
                deltas += [
                    theano.shared(
                        np.zeros((prev_layer_dim, layer.dim), dtype=floatX))
                ]

            else:
                self.log.info(layer.W.name + ' is ' +
                              layer.W.__class__.__name__ +
                              ' but not SharedVariable.')

            if is_shared_var(layer.b):
                assert layer.b.dtype == floatX
                params += [layer.b]
                deltas += [theano.shared(np.zeros(layer.dim, dtype=floatX))]

            else:
                self.log.info(layer.b.name + ' is ' +
                              layer.b.__class__.__name__ +
                              ' but not SharedVariable.')

            prev_layer_dim = layer.dim

        #=====================[ training params updates ]=====================#

        self.log.info("..update params: " + str(params))

        train_x = T.matrix('train_x', dtype=floatX)
        train_y = T.matrix('train_y', dtype=floatX)

        train_y_pred, train_layers_stats = self.model.train_fprop(train_x)
        train_cost = self.learning_rule.cost.get_cost(train_y, train_y_pred)

        if self.learning_rule.L1_lambda:
            self.log.info('..applying L1_lambda: %f' %
                          self.learning_rule.L1_lambda)
            L1 = theano.shared(0.)
            for layer in self.model.layers:
                if is_shared_var(layer.W):
                    L1 += T.sqrt((layer.W**2).sum(axis=0)).sum()

                else:
                    self.log.info(layer.W.name + ' is ' +
                                  layer.W.__class__.__name__ +
                                  ' is not used in L1 regularization')
            train_cost += self.learning_rule.L1_lambda * L1

        if self.learning_rule.L2_lambda:
            self.log.info('..applying L2_lambda: %f' %
                          self.learning_rule.L2_lambda)
            L2 = theano.shared(0.)
            for layer in self.model.layers:
                if is_shared_var(layer.W):
                    L2 += ((layer.W**2).sum(axis=0)).sum()

                else:
                    self.log.info(layer.W.name + ' is ' +
                                  layer.W.__class__.__name__ +
                                  ' is not used in L2 regularization')
            train_cost += self.learning_rule.L2_lambda * L2

        train_updates = []
        gparams = T.grad(train_cost, params)

        for delta, param, gparam in zip(deltas, params, gparams):

            train_updates += self.learning_method.update(delta, gparam)

            # applying max_col_norm regularisation
            if param.name[0] == 'W' and self.learning_rule.max_col_norm:
                W_update = param + delta
                w_len = T.sqrt((W_update**2).sum(axis=0))
                divisor = (w_len <= self.learning_rule.max_col_norm) + \
                        (w_len > self.learning_rule.max_col_norm) * w_len / \
                        self.learning_rule.max_col_norm
                W_update = W_update / divisor.reshape((1, divisor.shape[0]))
                train_updates += [(param, W_update)]

            else:
                train_updates += [(param, param + delta)]

        #----[ append updates of stats from each layer to train updates ]-----#

        self.train_stats_names, train_stats_vars = split_list(
            train_layers_stats)
        train_stats_vars = [var.astype(floatX) for var in train_stats_vars]
        self.train_stats_shared = generate_shared_list(train_stats_vars)
        train_stats_updates = merge_lists(self.train_stats_shared,
                                          train_stats_vars)
        train_updates += train_stats_updates

        #-------------------------[ train functions ]-------------------------#

        self.log.info('..begin compiling functions')

        train_stopping_cost = self.learning_rule.stopping_criteria[
            'cost'].get_cost(train_y, train_y_pred)

        self.training = theano.function(inputs=[train_x, train_y],
                                        outputs=(train_stopping_cost,
                                                 train_cost),
                                        updates=train_updates,
                                        on_unused_input='warn',
                                        allow_input_downcast=True)

        self.log.info('..training function compiled')

        #======================[ testing params updates ]=====================#

        test_x = T.matrix('test_x', dtype=floatX)
        test_y = T.matrix('test_y', dtype=floatX)
        test_y_pred, test_layers_stats = self.model.test_fprop(test_x)

        #-----[ append updates of stats from each layer to test updates ]-----#

        self.test_stats_names, test_stats_vars = split_list(test_layers_stats)
        test_stats_vars = [var.astype(floatX) for var in test_stats_vars]
        self.test_stats_shared = generate_shared_list(test_stats_vars)
        test_stats_updates = merge_lists(self.test_stats_shared,
                                         test_stats_vars)

        #-------------------------[ test functions ]--------------------------#

        test_stopping_cost = self.learning_rule.stopping_criteria[
            'cost'].get_cost(test_y, test_y_pred)
        test_cost = self.learning_rule.cost.get_cost(test_y, test_y_pred)

        self.testing = theano.function(inputs=[test_x, test_y],
                                       outputs=(test_stopping_cost, test_cost),
                                       updates=test_stats_updates,
                                       on_unused_input='warn',
                                       allow_input_downcast=True)

        self.log.info('..testing function compiled')

    def run(self):

        best_train_error = float(sys.maxint)
        best_valid_error = float(sys.maxint)
        # best_test_error = float(sys.maxint)

        mean_train_error = float(sys.maxint)
        mean_valid_error = float(sys.maxint)
        mean_test_error = float(sys.maxint)

        mean_train_cost = float(sys.maxint)
        mean_valid_cost = float(sys.maxint)
        mean_test_cost = float(sys.maxint)

        best_train_cost = float(sys.maxint)

        train_stats_values = []
        valid_stats_values = []
        # test_stats_values = []

        epoch = 0
        error_dcr = 0
        self.best_epoch_last_update = 0
        self.best_valid_last_update = float(sys.maxint)

        train_stats_names = [
            'train_' + name for name in self.train_stats_names
        ]
        valid_stats_names = ['valid_' + name for name in self.test_stats_names]
        # test_stats_names = ['test_' + name for name in self.test_stats_names]

        job_start = time.time()

        while (self.continue_learning(epoch, error_dcr, best_valid_error)):

            if epoch > 0:
                self.log.info("best_epoch_last_update: %d" %
                              self.best_epoch_last_update)
                self.log.info("valid_error_decrease: %f" % error_dcr)
                self.log.info("best_valid_last_update: %f" %
                              self.best_valid_last_update)
                self.log.info("========[ End of Epoch ]========\n\n")

            epoch += 1

            start_time = time.time()

            num_train_examples = 0
            total_train_cost = 0.
            total_train_stopping_cost = 0.
            train_stats_values = np.zeros(len(train_stats_names), dtype=floatX)

            num_valid_examples = 0
            total_valid_cost = 0.
            total_valid_stopping_cost = 0.
            valid_stats_values = np.zeros(len(valid_stats_names), dtype=floatX)

            num_test_examples = 0
            total_test_cost = 0.
            total_test_stopping_cost = 0.
            # test_stats_values = np.zeros(len(test_stats_names), dtype=floatX)

            blk = 0

            for block in self.dataset:
                block_time = time.time()
                blk += 1

                train_set = block.get_train()
                valid_set = block.get_valid()
                test_set = block.get_test()

                #====================[ Training Progress ]====================#
                if train_set.dataset_size() > 0:

                    self.log.info('..training ' +
                                  self.dataset.__class__.__name__ +
                                  ' block %s/%s' %
                                  (blk, self.dataset.nblocks()))

                    for idx in train_set:
                        stopping_cost, cost = self.training(
                            train_set.X[idx], train_set.y[idx])
                        total_train_cost += cost * len(idx)
                        total_train_stopping_cost += stopping_cost * len(idx)
                        num_train_examples += len(idx)
                        train_stats_values += len(idx) * get_shared_values(
                            self.train_stats_shared)

                    #-------[ Update train best cost and error values ]-------#
                    mean_train_error = total_train_stopping_cost / num_train_examples
                    mean_train_cost = total_train_cost / num_train_examples
                    train_stats_values /= num_train_examples

                    if mean_train_error < best_train_error:
                        best_train_error = mean_train_error

                    if mean_train_cost < 0.999 * best_train_cost:
                        best_train_cost = mean_train_cost
                    else:
                        self.log.info(
                            'training cost is not improving after epoch %d, ' %
                            epoch)
                        #---[ Use learning rate decay if the error does not improve after 1 epoch ]---#
                        if self.learning_rule.learning_rate_decay_factor and hasattr(
                                self.learning_method, 'learning_rate'):
                            self.log.info(
                                'decay learning_rate by factor %.3f' %
                                self.learning_rule.learning_rate_decay_factor)
                            new_lr = self.learning_method.learning_rate.get_value(return_internal_type=True) \
                                    / self.learning_rule.learning_rate_decay_factor
                            self.log.info('new learning rate %.3f' % new_lr)
                            self.learning_method.learning_rate.set_value(
                                new_lr)

                #===================[ Validating Progress ]===================#
                if valid_set.dataset_size() > 0:

                    self.log.info('..validating ' +
                                  self.dataset.__class__.__name__ +
                                  ' block %s/%s' %
                                  (blk, self.dataset.nblocks()))

                    for idx in valid_set:
                        stopping_cost, cost = self.testing(
                            valid_set.X[idx], valid_set.y[idx])
                        total_valid_cost += cost * len(idx)
                        total_valid_stopping_cost += stopping_cost * len(idx)
                        num_valid_examples += len(idx)
                        valid_stats_values += len(idx) * get_shared_values(
                            self.test_stats_shared)

                    #-------[ Update valid best cost and error values ]-------#
                    mean_valid_error = total_valid_stopping_cost / num_valid_examples
                    mean_valid_cost = total_valid_cost / num_valid_examples
                    valid_stats_values /= num_valid_examples

                    if mean_valid_error < best_valid_error:
                        best_valid_error = mean_valid_error
                        self.log.info('..best validation error so far')
                        if self.log.save_model:
                            self.log._save_model(self.model)
                            self.log.info('..model saved')

                        if self.log.save_learning_rule:
                            self.log._save_learning_rule(self.learning_rule)
                            self.log.info('..learning rule saved')

                    if mean_valid_error < self.best_valid_last_update:
                        error_dcr = self.best_valid_last_update - mean_valid_error
                    else:
                        error_dcr = 0

                #====================[ Testing Progress ]=====================#
                if test_set.dataset_size() > 0:

                    self.log.info('..testing ' +
                                  self.dataset.__class__.__name__ +
                                  ' block %s/%s' %
                                  (blk, self.dataset.nblocks()))

                    for idx in test_set:
                        stopping_cost, cost = self.testing(
                            test_set.X[idx], test_set.y[idx])
                        total_test_cost += cost * len(idx)
                        total_test_stopping_cost += stopping_cost * len(idx)
                        num_test_examples += len(idx)

                    #-------[ Update test best cost and error values ]--------#
                    mean_test_error = total_test_stopping_cost / num_test_examples
                    mean_test_cost = total_test_cost / num_test_examples

                self.log.info('block time: %0.2fs' %
                              (time.time() - block_time))
                self.log.info(get_mem_usage())

            #==============[ save to database, save epoch error]==============#
            if self.log.save_to_database:
                self.log._save_to_database(epoch, best_train_error,
                                           best_valid_error, mean_test_error)
                self.log.info('..sent to database: %s:%s' %
                              (self.log.save_to_database['name'],
                               self.log.experiment_name))

            if self.log.save_epoch_error:
                self.log._save_epoch_error(epoch, mean_train_error,
                                           mean_valid_error, mean_test_error)
                self.log.info('..epoch error saved')

            end_time = time.time()

            #=====================[ log outputs to file ]=====================#

            merged_train = merge_lists(train_stats_names, train_stats_values)
            merged_valid = merge_lists(valid_stats_names, valid_stats_values)
            # merged_test = merge_lists(test_stats_names, test_stats_values)

            stopping_cost_type = self.learning_rule.stopping_criteria[
                'cost'].type
            outputs = [
                ('epoch', epoch), ('runtime(s)', int(end_time - start_time)),
                ('mean_train_cost_' + self.learning_rule.cost.type,
                 mean_train_cost),
                ('mean_train_error_' + stopping_cost_type, mean_train_error),
                ('best_train_error_' + stopping_cost_type, best_train_error),
                ('mean_valid_cost_' + self.learning_rule.cost.type,
                 mean_valid_cost),
                ('mean_valid_error_' + stopping_cost_type, mean_valid_error),
                ('best_valid_error_' + stopping_cost_type, best_valid_error),
                ('mean_test_cost_' + self.learning_rule.cost.type,
                 mean_test_cost),
                ('mean_test_error_' + stopping_cost_type, mean_test_error)
            ]

            # outputs += merged_train + merged_valid + merged_test
            outputs += merged_train + merged_valid
            self.log._log_outputs(outputs)

        job_end = time.time()
        self.log.info(
            'Job Completed on %s' %
            time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime(job_end)))
        ttl_time = int(job_end - job_start)
        dt = datetime.timedelta(seconds=ttl_time)
        self.log.info('Total Time Taken: %s' % str(dt))
        self.log.info("========[ End of Job ]========\n\n")

    def continue_learning(self, epoch, error_dcr, best_valid_error):

        if epoch > self.learning_rule.stopping_criteria['max_epoch']:
            return False

        elif self.learning_rule.stopping_criteria['percent_decrease'] is None or \
            self.learning_rule.stopping_criteria['epoch_look_back'] is None:
            return True

        elif np.abs(error_dcr * 1.0 / self.best_valid_last_update) \
            >= self.learning_rule.stopping_criteria['percent_decrease']:
            self.best_valid_last_update = best_valid_error
            self.best_epoch_last_update = epoch
            return True

        elif epoch - self.best_epoch_last_update > \
            self.learning_rule.stopping_criteria['epoch_look_back']:
            return False

        else:
            return True
示例#6
0
class TrainObject():

    '''
    UPDATES:
        (Normal momentum)
        delta := momentum * delta - learning_rate * (d cost(param) / d param)
        param := param + delta

        (Nesterov momentum)
        delta := momentum * delta - learning_rate * (d cost(param + momentum*delta) / d param)
        param := param + delta
    '''

    def __init__(self, model, dataset, learning_rule, learning_method, log=None):
        self.model = model
        self.dataset = dataset
        self.learning_rule = learning_rule
        self.learning_method = learning_method
        self.log = log

        if self.log is None:
            # use default Log setting
            self.log = Log(logger=internal_logger)

        elif self.log.save_to_database:
            self.log.print_records()
            print('\n')

        self.log.info( '..begin setting up train object')
        self.setup()

    def setup(self):


        #================[ check output dim with target size ]================#

        assert self.model.input_dim == self.dataset.feature_size(), \
                'input dim: ' + str(self.model.input_dim) + \
                ', is not equal to feature size: ' + str(self.dataset.feature_size())

        assert self.model.layers[-1].dim == self.dataset.target_size(), \
                'output dim: ' + str(self.model.layers[-1].dim) + \
                ', is not equal to target size: ' + str(self.dataset.target_size())

        #===================[ build params and deltas list ]==================#

        def is_shared_var(var):
            return var.__class__.__name__ == 'TensorSharedVariable' or \
                    var.__class__.__name__ == 'CudaNdarraySharedVariable'


        params = []
        deltas = []

        prev_layer_dim = self.model.input_dim
        for layer in self.model.layers:

            # append layer params that will be updated during training
            if len(layer.params) > 0:
                for param in layer.params:
                    params += [param]
                    deltas += [theano.shared(np.zeros(param.shape.eval(), dtype=floatX))]

            if is_shared_var(layer.W):
                assert layer.W.dtype == floatX
                params += [layer.W]
                deltas += [theano.shared(np.zeros((prev_layer_dim, layer.dim), dtype=floatX))]

            else:
                self.log.info(layer.W.name + ' is ' + layer.W.__class__.__name__ +
                            ' but not SharedVariable.')

            if is_shared_var(layer.b):
                assert layer.b.dtype == floatX
                params += [layer.b]
                deltas += [theano.shared(np.zeros(layer.dim, dtype=floatX))]

            else:
                self.log.info(layer.b.name + ' is ' + layer.b.__class__.__name__ +
                            ' but not SharedVariable.')

            prev_layer_dim = layer.dim


        #=====================[ training params updates ]=====================#

        self.log.info("..update params: " + str(params))

        train_x = T.matrix('train_x', dtype=floatX)
        train_y = T.matrix('train_y', dtype=floatX)

        train_y_pred, train_layers_stats = self.model.train_fprop(train_x)
        train_cost = self.learning_rule.cost.get_cost(train_y, train_y_pred)

        if self.learning_rule.L1_lambda:
            self.log.info('..applying L1_lambda: %f'%self.learning_rule.L1_lambda)
            L1 = theano.shared(0.)
            for layer in self.model.layers:
                if is_shared_var(layer.W):
                    L1 += T.sqrt((layer.W ** 2).sum(axis=0)).sum()

                else:
                    self.log.info(layer.W.name + ' is ' + layer.W.__class__.__name__ +
                        ' is not used in L1 regularization')
            train_cost += self.learning_rule.L1_lambda * L1

        if self.learning_rule.L2_lambda:
            self.log.info('..applying L2_lambda: %f'%self.learning_rule.L2_lambda)
            L2 = theano.shared(0.)
            for layer in self.model.layers:
                if is_shared_var(layer.W):
                    L2 += ((layer.W ** 2).sum(axis=0)).sum()

                else:
                    self.log.info(layer.W.name + ' is ' + layer.W.__class__.__name__ +
                        ' is not used in L2 regularization')
            train_cost += self.learning_rule.L2_lambda * L2

        train_updates = []
        gparams = T.grad(train_cost, params)

        for delta, param, gparam in zip(deltas, params, gparams):

            train_updates += self.learning_method.update(delta, gparam)

            # applying max_col_norm regularisation
            if param.name[0] == 'W' and self.learning_rule.max_col_norm:
                W_update = param + delta
                w_len = T.sqrt((W_update ** 2).sum(axis=0))
                divisor = (w_len <= self.learning_rule.max_col_norm) + \
                        (w_len > self.learning_rule.max_col_norm) * w_len / \
                        self.learning_rule.max_col_norm
                W_update = W_update / divisor.reshape((1, divisor.shape[0]))
                train_updates += [(param, W_update)]

            else:
                train_updates += [(param, param + delta)]

        #----[ append updates of stats from each layer to train updates ]-----#

        self.train_stats_names, train_stats_vars = split_list(train_layers_stats)
        train_stats_vars = [var.astype(floatX) for var in train_stats_vars]
        self.train_stats_shared = generate_shared_list(train_stats_vars)
        train_stats_updates = merge_lists(self.train_stats_shared, train_stats_vars)
        train_updates += train_stats_updates

        #-------------------------[ train functions ]-------------------------#

        self.log.info('..begin compiling functions')

        train_stopping_cost = self.learning_rule.stopping_criteria['cost'].get_cost(train_y, train_y_pred)

        self.training = theano.function(inputs=[train_x, train_y],
                                        outputs=(train_stopping_cost, train_cost),
                                        updates=train_updates,
                                        on_unused_input='warn',
                                        allow_input_downcast=True)

        self.log.info('..training function compiled')

        #======================[ testing params updates ]=====================#

        test_x = T.matrix('test_x', dtype=floatX)
        test_y = T.matrix('test_y', dtype=floatX)
        test_y_pred, test_layers_stats = self.model.test_fprop(test_x)

        #-----[ append updates of stats from each layer to test updates ]-----#

        self.test_stats_names, test_stats_vars = split_list(test_layers_stats)
        test_stats_vars = [var.astype(floatX) for var in test_stats_vars]
        self.test_stats_shared = generate_shared_list(test_stats_vars)
        test_stats_updates = merge_lists(self.test_stats_shared, test_stats_vars)

        #-------------------------[ test functions ]--------------------------#

        test_stopping_cost = self.learning_rule.stopping_criteria['cost'].get_cost(test_y, test_y_pred)
        test_cost = self.learning_rule.cost.get_cost(test_y, test_y_pred)

        self.testing = theano.function(inputs=[test_x, test_y],
                                        outputs=(test_stopping_cost, test_cost),
                                        updates=test_stats_updates,
                                        on_unused_input='warn',
                                        allow_input_downcast=True)

        self.log.info('..testing function compiled')


    def run(self):

        best_train_error = float(sys.maxint)
        best_valid_error = float(sys.maxint)
        # best_test_error = float(sys.maxint)

        mean_train_error = float(sys.maxint)
        mean_valid_error = float(sys.maxint)
        mean_test_error = float(sys.maxint)

        mean_train_cost = float(sys.maxint)
        mean_valid_cost = float(sys.maxint)
        mean_test_cost = float(sys.maxint)

        best_train_cost = float(sys.maxint)

        train_stats_values = []
        valid_stats_values = []
        # test_stats_values = []

        epoch = 0
        error_dcr = 0
        self.best_epoch_last_update = 0
        self.best_valid_last_update = float(sys.maxint)

        train_stats_names = ['train_' + name for name in self.train_stats_names]
        valid_stats_names = ['valid_' + name for name in self.test_stats_names]
        # test_stats_names = ['test_' + name for name in self.test_stats_names]

        job_start = time.time()

        while (self.continue_learning(epoch, error_dcr, best_valid_error)):

            if epoch > 0:
                self.log.info("best_epoch_last_update: %d"%self.best_epoch_last_update)
                self.log.info("valid_error_decrease: %f"%error_dcr)
                self.log.info("best_valid_last_update: %f"%self.best_valid_last_update)
                self.log.info("========[ End of Epoch ]========\n\n")

            epoch += 1

            start_time = time.time()

            num_train_examples = 0
            total_train_cost = 0.
            total_train_stopping_cost = 0.
            train_stats_values = np.zeros(len(train_stats_names), dtype=floatX)

            num_valid_examples = 0
            total_valid_cost = 0.
            total_valid_stopping_cost = 0.
            valid_stats_values = np.zeros(len(valid_stats_names), dtype=floatX)

            num_test_examples = 0
            total_test_cost = 0.
            total_test_stopping_cost = 0.
            # test_stats_values = np.zeros(len(test_stats_names), dtype=floatX)

            blk = 0

            for block in self.dataset:
                block_time = time.time()
                blk += 1

                train_set = block.get_train()
                valid_set = block.get_valid()
                test_set = block.get_test()


                #====================[ Training Progress ]====================#
                if train_set.dataset_size() > 0:

                    self.log.info('..training '+ self.dataset.__class__.__name__
                                + ' block %s/%s'%(blk, self.dataset.nblocks()))

                    for idx in train_set:
                        stopping_cost, cost = self.training(train_set.X[idx], train_set.y[idx])
                        total_train_cost += cost * len(idx)
                        total_train_stopping_cost += stopping_cost * len(idx)
                        num_train_examples += len(idx)
                        train_stats_values += len(idx) * get_shared_values(self.train_stats_shared)

                    #-------[ Update train best cost and error values ]-------#
                    mean_train_error = total_train_stopping_cost / num_train_examples
                    mean_train_cost = total_train_cost / num_train_examples
                    train_stats_values /= num_train_examples

                    if mean_train_error < best_train_error:
                        best_train_error = mean_train_error

                    if mean_train_cost < 0.999 * best_train_cost:
                        best_train_cost = mean_train_cost
                    else:
                        self.log.info('training cost is not improving after epoch %d, '%epoch)
                        #---[ Use learning rate decay if the error does not improve after 1 epoch ]---#
                        if self.learning_rule.learning_rate_decay_factor and hasattr(self.learning_method, 'learning_rate'):
                            self.log.info('decay learning_rate by factor %.3f'%self.learning_rule.learning_rate_decay_factor)
                            new_lr = self.learning_method.learning_rate.get_value(return_internal_type=True) \
                                    / self.learning_rule.learning_rate_decay_factor
                            self.log.info('new learning rate %.3f'%new_lr)
                            self.learning_method.learning_rate.set_value(new_lr)


                #===================[ Validating Progress ]===================#
                if valid_set.dataset_size() > 0:

                    self.log.info('..validating ' + self.dataset.__class__.__name__
                                + ' block %s/%s'%(blk, self.dataset.nblocks()))

                    for idx in valid_set:
                        stopping_cost, cost = self.testing(valid_set.X[idx], valid_set.y[idx])
                        total_valid_cost += cost * len(idx)
                        total_valid_stopping_cost += stopping_cost * len(idx)
                        num_valid_examples += len(idx)
                        valid_stats_values += len(idx) * get_shared_values(self.test_stats_shared)

                    #-------[ Update valid best cost and error values ]-------#
                    mean_valid_error = total_valid_stopping_cost / num_valid_examples
                    mean_valid_cost = total_valid_cost / num_valid_examples
                    valid_stats_values /= num_valid_examples

                    if mean_valid_error < best_valid_error:
                        best_valid_error = mean_valid_error
                        self.log.info('..best validation error so far')
                        if self.log.save_model:
                            self.log._save_model(self.model)
                            self.log.info('..model saved')

                        if self.log.save_learning_rule:
                            self.log._save_learning_rule(self.learning_rule)
                            self.log.info('..learning rule saved')

                    if mean_valid_error < self.best_valid_last_update:
                        error_dcr = self.best_valid_last_update - mean_valid_error
                    else:
                        error_dcr = 0


                #====================[ Testing Progress ]=====================#
                if test_set.dataset_size() > 0:

                    self.log.info('..testing ' + self.dataset.__class__.__name__
                                + ' block %s/%s'%(blk, self.dataset.nblocks()))

                    for idx in test_set:
                        stopping_cost, cost = self.testing(test_set.X[idx], test_set.y[idx])
                        total_test_cost += cost * len(idx)
                        total_test_stopping_cost += stopping_cost * len(idx)
                        num_test_examples += len(idx)

                    #-------[ Update test best cost and error values ]--------#
                    mean_test_error = total_test_stopping_cost / num_test_examples
                    mean_test_cost = total_test_cost / num_test_examples

                self.log.info('block time: %0.2fs'%(time.time()-block_time))
                self.log.info(get_mem_usage())


            #==============[ save to database, save epoch error]==============#
            if self.log.save_to_database:
                self.log._save_to_database(epoch, best_train_error, best_valid_error, mean_test_error)
                self.log.info('..sent to database: %s:%s' % (self.log.save_to_database['name'],
                                                        self.log.experiment_name))

            if self.log.save_epoch_error:
                self.log._save_epoch_error(epoch, mean_train_error, mean_valid_error, mean_test_error)
                self.log.info('..epoch error saved')

            end_time = time.time()

            #=====================[ log outputs to file ]=====================#

            merged_train = merge_lists(train_stats_names, train_stats_values)
            merged_valid = merge_lists(valid_stats_names, valid_stats_values)
            # merged_test = merge_lists(test_stats_names, test_stats_values)

            stopping_cost_type = self.learning_rule.stopping_criteria['cost'].type
            outputs = [('epoch', epoch),
                        ('runtime(s)', int(end_time-start_time)),
                        ('mean_train_cost_' + self.learning_rule.cost.type, mean_train_cost),
                        ('mean_train_error_' + stopping_cost_type, mean_train_error),
                        ('best_train_error_' + stopping_cost_type, best_train_error),
                        ('mean_valid_cost_' + self.learning_rule.cost.type, mean_valid_cost),
                        ('mean_valid_error_' + stopping_cost_type, mean_valid_error),
                        ('best_valid_error_' + stopping_cost_type, best_valid_error),
                        ('mean_test_cost_' + self.learning_rule.cost.type, mean_test_cost),
                        ('mean_test_error_' + stopping_cost_type, mean_test_error)]

            # outputs += merged_train + merged_valid + merged_test
            outputs += merged_train + merged_valid
            self.log._log_outputs(outputs)


        job_end = time.time()
        self.log.info('Job Completed on %s'%time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime(job_end)))
        ttl_time = int(job_end - job_start)
        dt = datetime.timedelta(seconds=ttl_time)
        self.log.info('Total Time Taken: %s'%str(dt))
        self.log.info("========[ End of Job ]========\n\n")


    def continue_learning(self, epoch, error_dcr, best_valid_error):

        if epoch > self.learning_rule.stopping_criteria['max_epoch']:
            return False

        elif self.learning_rule.stopping_criteria['percent_decrease'] is None or \
            self.learning_rule.stopping_criteria['epoch_look_back'] is None:
            return True

        elif np.abs(error_dcr * 1.0 / self.best_valid_last_update) \
            >= self.learning_rule.stopping_criteria['percent_decrease']:
            self.best_valid_last_update = best_valid_error
            self.best_epoch_last_update = epoch
            return True

        elif epoch - self.best_epoch_last_update > \
            self.learning_rule.stopping_criteria['epoch_look_back']:
            return False

        else:
            return True
示例#7
0
def mlp():

    # build dataset
    data = Mnist(preprocessor=None, train_valid_test_ratio=[5, 1, 1])

    # build mlp
    mlp = MLP(input_dim=data.feature_size())

    W1 = GaussianWeight(prev_dim=mlp.input_dim, this_dim=1000)
    hidden1 = PRELU(dim=1000,
                    name='h1_layer',
                    W=W1(mean=0, std=0.1),
                    b=None,
                    dropout_below=None)

    mlp.add_layer(hidden1)

    W2 = XavierWeight(prev_dim=hidden1.dim, this_dim=data.target_size())
    output = Softmax(dim=data.target_size(),
                     name='output_layer',
                     W=W2(),
                     b=None,
                     dropout_below=None)

    mlp.add_layer(output)

    # build learning method
    learning_method = AdaGrad(learning_rate=0.1, momentum=0.9)

    # set the learning rules
    learning_rule = LearningRule(max_col_norm=10,
                                 L1_lambda=None,
                                 L2_lambda=None,
                                 training_cost=Cost(type='mse'),
                                 learning_rate_decay_factor=None,
                                 stopping_criteria={
                                     'max_epoch': 300,
                                     'epoch_look_back': 10,
                                     'cost': Cost(type='error'),
                                     'percent_decrease': 0.01
                                 })

    # (optional) build the logging object
    log = Log(experiment_name='mnist',
              description='This is tutorial example',
              save_outputs=True,
              save_learning_rule=True,
              save_model=True,
              save_epoch_error=True,
              save_to_database={
                  'name': 'Example.db',
                  'records': {
                      'Dataset':
                      data.__class__.__name__,
                      'max_col_norm':
                      learning_rule.max_col_norm,
                      'Weight_Init_Seed':
                      mlp.rand_seed,
                      'Dropout_Below':
                      str([layer.dropout_below for layer in mlp.layers]),
                      'Batch_Size':
                      data.batch_size,
                      'Layer_Dim':
                      str([layer.dim for layer in mlp.layers]),
                      'Layer_Types':
                      str([layer.__class__.__name__ for layer in mlp.layers]),
                      'Preprocessor':
                      data.preprocessor.__class__.__name__,
                      'Learning_Rate':
                      learning_method.learning_rate,
                      'Momentum':
                      learning_method.momentum,
                      'Training_Cost':
                      learning_rule.cost.type,
                      'Stopping_Cost':
                      learning_rule.stopping_criteria['cost'].type
                  }
              })  # end log

    # put everything into the train object
    train_object = TrainObject(model=mlp,
                               dataset=data,
                               learning_rule=learning_rule,
                               learning_method=learning_method,
                               log=log)
    # finally run the code
    train_object.run()