示例#1
0
    def __init__(self, network, config=None):
        """
        Basic neural network trainer.
        :type network: deepy.NeuralNetwork
        :type config: deepy.conf.TrainerConfig
        :return:
        """
        super(NeuralTrainer, self).__init__()

        self.config = None
        if isinstance(config, TrainerConfig):
            self.config = config
        elif isinstance(config, dict):
            self.config = TrainerConfig(config)
        else:
            self.config = TrainerConfig()
        # Model and network all refer to the computational graph
        self.model = self.network = network

        self.network.prepare_training()
        self._setup_costs()

        self.evaluation_func = None

        self.validation_frequency = self.config.validation_frequency
        self.min_improvement = self.config.min_improvement
        self.patience = self.config.patience
        self._iter_callbacks = []

        self.best_cost = 1e100
        self.best_iter = 0
        self.best_params = self.copy_params()
        self._skip_batches = 0
        self._progress = 0
        self.last_cost = 0
示例#2
0
文件: base.py 项目: JunjieHu/deepy
    def __init__(self, network, config=None):
        """
        Basic neural network trainer.
        :type network: deepy.NeuralNetwork
        :type config: deepy.conf.TrainerConfig
        :return:
        """
        super(NeuralTrainer, self).__init__()

        self.config = None
        if isinstance(config, TrainerConfig):
            self.config = config
        elif isinstance(config, dict):
            self.config = TrainerConfig(config)
        else:
            self.config = TrainerConfig()
        # Model and network all refer to the computational graph
        self.model = self.network = network

        self.network.prepare_training()
        self._setup_costs()

        self.evaluation_func = None

        self.validation_frequency = self.config.validation_frequency
        self.min_improvement = self.config.min_improvement
        self.patience = self.config.patience
        self._iter_callbacks = []

        self.best_cost = 1e100
        self.best_iter = 0
        self.best_params = self.copy_params()
        self._skip_batches = 0
        self._progress = 0
        self.last_cost = 0
示例#3
0
文件: trainers.py 项目: 52nlp/deepy
    def __init__(self, network, config=None, method=None):

        if method:
            logging.info("changing optimization method to '%s'" % method)
            if not config:
                config = TrainerConfig()
            elif isinstance(config, dict):
                config = TrainerConfig(config)
            config.method = method

        super(GeneralNeuralTrainer, self).__init__(network, config)

        logging.info('compiling %s learning function', self.__class__.__name__)

        network_updates = list(network.updates) + list(network.training_updates)
        learning_updates = list(self.learning_updates())
        update_list = network_updates + learning_updates
        logging.info("network updates: %s" % " ".join(map(str, [x[0] for x in network_updates])))
        logging.info("learning updates: %s" % " ".join(map(str, [x[0] for x in learning_updates])))

        self.learning_func = theano.function(
            network.input_variables + network.target_variables,
            map(lambda v: theano.Out(v, borrow=True), self.training_variables),
            updates=update_list, allow_input_downcast=True,
            mode=self.config.get("theano_mode", None))
示例#4
0
文件: trainers.py 项目: 52nlp/deepy
    def __init__(self, network, config=None):
        """
        Basic neural network trainer.
        :type network: deepy.NeuralNetwork
        :type config: deepy.conf.TrainerConfig
        :return:
        """
        super(NeuralTrainer, self).__init__()

        self.config = None
        if isinstance(config, TrainerConfig):
            self.config = config
        elif isinstance(config, dict):
            self.config = TrainerConfig(config)
        else:
            self.config = TrainerConfig()
        self.network = network

        self.network.prepare_training()

        self._setup_costs()

        logging.info("compile evaluation function")
        self.evaluation_func = theano.function(
            network.input_variables + network.target_variables, self.evaluation_variables, updates=network.updates,
            allow_input_downcast=True, mode=self.config.get("theano_mode", None))
        self.learning_func = None

        self.validation_frequency = self.config.validation_frequency
        self.min_improvement = self.config.min_improvement
        self.patience = self.config.patience

        self.best_cost = 1e100
        self.best_iter = 0
        self.best_params = self._copy_network_params()
示例#5
0
    def __init__(self, network, method=None, config=None, annealer=None, validator=None):

        if method:
            logging.info("changing optimization method to '%s'" % method)
            if not config:
                config = TrainerConfig()
            elif isinstance(config, dict):
                config = TrainerConfig(config)
            config.method = method

        super(GeneralNeuralTrainer, self).__init__(network, config, annealer=annealer, validator=validator)

        self._learning_func = None
示例#6
0
    def __init__(self, network, config=None, method=None):

        if method:
            logging.info("changing optimization method to '%s'" % method)
            if not config:
                config = TrainerConfig()
            elif isinstance(config, dict):
                config = TrainerConfig(config)
            config.method = method

        super(GeneralNeuralTrainer, self).__init__(network, config)

        self._learning_func = None
示例#7
0
 def __init__(self, state_num, action_num, experience_replay=True):
     self.state_num = state_num
     self.action_num = action_num
     self.experience_replay = experience_replay
     self.experience_pool = []
     self.model = get_model(state_num, action_num)
     train_conf = TrainerConfig()
     train_conf.learning_rate = LEARNING_RATE
     train_conf.weight_l2 = 0
     self.trainer = SGDTrainer(self.model, train_conf)
     self.trainer.training_names = []
     self.trainer.training_variables = []
     self.thread_lock = threading.Lock()
     self.epsilon = EPSILON
     self.tick = 0
示例#8
0
文件: agent.py 项目: JunjieHu/deepy
 def __init__(self, state_num, action_num, experience_replay=True):
     self.state_num = state_num
     self.action_num = action_num
     self.experience_replay = experience_replay
     self.experience_pool = []
     self.model = get_model(state_num, action_num)
     train_conf = TrainerConfig()
     train_conf.learning_rate = LEARNING_RATE
     train_conf.weight_l2 = 0
     self.trainer = SGDTrainer(self.model, train_conf)
     self.trainer.training_names = []
     self.trainer.training_variables = []
     self.thread_lock = threading.Lock()
     self.epsilon = EPSILON
     self.tick = 0
示例#9
0
    def __init__(self, network, config=None):
        """
        Basic neural network trainer.
        :type network: deepy.NeuralNetwork
        :type config: deepy.conf.TrainerConfig
        :return:
        """
        super(NeuralTrainer, self).__init__()

        self.config = None
        if isinstance(config, TrainerConfig):
            self.config = config
        elif isinstance(config, dict):
            self.config = TrainerConfig(config)
        else:
            self.config = TrainerConfig()
        self.network = network

        self.network.prepare_training()

        self._setup_costs()

        logging.info("compile evaluation function")
        self.evaluation_func = theano.function(
            network.input_variables + network.target_variables,
            self.evaluation_variables,
            updates=network.updates,
            allow_input_downcast=True,
            mode=self.config.get("theano_mode", None))
        self.learning_func = None

        self.validation_frequency = self.config.validation_frequency
        self.min_improvement = self.config.min_improvement
        self.patience = self.config.patience

        self.best_cost = 1e100
        self.best_iter = 0
        self.best_params = self._copy_network_params()
        self._skip_batches = 0
        self._progress = 0
示例#10
0
    def __init__(self, network, config=None, method=None):

        if method:
            logging.info("changing optimization method to '%s'" % method)
            if not config:
                config = TrainerConfig()
            config.method = method

        super(GeneralNeuralTrainer, self).__init__(network, config)

        logging.info('compiling %s learning function', self.__class__.__name__)

        network_updates = list(network.updates) + list(network.training_updates)
        learning_updates = list(self.learning_updates())
        update_list = network_updates + learning_updates
        logging.info("network updates: %s" % " ".join(map(str, [x[0] for x in network_updates])))
        logging.info("learning updates: %s" % " ".join(map(str, [x[0] for x in learning_updates])))

        self.learning_func = theano.function(
            network.input_variables + network.target_variables,
            self.training_variables,
            updates=update_list, allow_input_downcast=True,
            mode=config.get("theano_mode", theano.Mode(linker=THEANO_LINKER)))
示例#11
0
    def __init__(self, network, config=None, method=None):

        if method:
            logging.info("changing optimization method to '%s'" % method)
            if not config:
                config = TrainerConfig()
            elif isinstance(config, dict):
                config = TrainerConfig(config)
            config.method = method

        super(GeneralNeuralTrainer, self).__init__(network, config)

        logging.info('compiling %s learning function', self.__class__.__name__)

        network_updates = list(network.updates) + list(
            network.training_updates)
        learning_updates = list(self.learning_updates())
        update_list = network_updates + learning_updates
        logging.info("network updates: %s" %
                     " ".join(map(str, [x[0] for x in network_updates])))
        logging.info("learning updates: %s" %
                     " ".join(map(str, [x[0] for x in learning_updates])))

        if False and config.data_transmitter:
            variables = [config.data_transmitter.get_iterator()]
            givens = config.data_transmitter.get_givens()
        else:
            variables = network.input_variables + network.target_variables
            givens = None

        self.learning_func = theano.function(
            variables,
            map(lambda v: theano.Out(v, borrow=True), self.training_variables),
            updates=update_list,
            allow_input_downcast=True,
            mode=self.config.get("theano_mode", None),
            givens=givens)
示例#12
0
def optimize_updates(params, gradients, config=None, shapes=None):
    """
    General optimization function for Theano.
    Parameters:
        params - parameters
        gradients - gradients
        config - training config
    Returns:
        Theano updates
    :type config: deepy.TrainerConfig or dict
    """
    if config and isinstance(config, dict):
        config = TrainerConfig(config)

    # Clipping
    if config:
        clip_value = config.get("gradient_clipping", None)

        if clip_value:
            clip_constant = T.constant(clip_value, dtype=FLOATX)

            if config.avoid_compute_embed_norm:
                grad_norm = multiple_l2_norm([t[1] for t in zip(params, gradients) if not t[0].name.startswith("W_embed")])
            else:
                grad_norm = multiple_l2_norm(gradients)
            isnan = T.or_(T.isnan(grad_norm), T.isinf(grad_norm))
            multiplier = ifelse(grad_norm < clip_constant,
                                T.constant(1., dtype=FLOATX), clip_constant / (grad_norm + EPSILON))

            # Clip
            clipped_gradients = []
            for param, g in zip(params, gradients):
                g = multiplier * g
                if config.avoid_nan:
                    g = T.switch(isnan, np.float32(0.1) * param, g)
                if config.gradient_tolerance:
                    g = ifelse(grad_norm > config.gradient_tolerance, T.zeros_like(g) + EPSILON, g)
                clipped_gradients.append(g)

            gradients = clipped_gradients
    # Regularization
    if config and config.weight_l2:
        regularized_gradients = []
        for param, grad in zip(params, gradients):
            grad = grad + (2 * config.weight_l2 * param)
            regularized_gradients.append(grad)
        gradients = regularized_gradients

    # Avoid nan but not computing the norm
    # This is not recommended
    if config and config.avoid_nan and not config.gradient_clipping:
        logging.info("avoid NaN gradients")
        new_gradients = []
        for grad in gradients:
            new_grad = ifelse(T.isnan(grad).any(), T.zeros_like(grad) + EPSILON, grad)
            new_gradients.append(new_grad)
        gradients = new_gradients


    # Find method
    method = "SGD"
    if config:
        method = config.get("method", method).upper()
    # Get Function
    func = None
    if method in ["SGD", "ADAGRAD", "ADADELTA", "FINETUNING_ADAGRAD"]:
        from cores.ada_family import ada_family_core
        func = ada_family_core
    elif method == "ADAM":
        from cores.adam import adam_core
        func = adam_core
    elif method == "RMSPROP":
        from cores.rmsprop import rmsprop_core
        func = rmsprop_core
    elif method == "MOMENTUM":
        from cores.momentum import momentum_core
        func = momentum_core

    if not func:
        raise NotImplementedError("method '%s' is not supported" % method)

    logging.info("optimize method=%s parameters=%s" % (method, str(params)))

    free_parameters = []
    return_vals = wrap_core(func, config, params, gradients)
    if type(return_vals) == list and type(return_vals[0]) == list:
        updates, free_parameters = return_vals
    else:
        updates = return_vals

    # No free param recording
    if config and not config.record_free_params:
        free_parameters = []

    # Weight bound
    if config.weight_bound:
        logging.info("apply weight bound of %.2f" % config.weight_bound)
        new_updates = []
        for param, update_value in updates:
            bounded_value = (update_value * (T.abs_(update_value) <= config.weight_bound) +
                             config.weight_bound * (update_value > config.weight_bound) +
                             -config.weight_bound * (update_value < -config.weight_bound))
            new_updates.append((param, bounded_value))
        updates = new_updates
    return updates, free_parameters
示例#13
0
文件: trainers.py 项目: 52nlp/deepy
class NeuralTrainer(object):
    '''This is a base class for all trainers.'''

    def __init__(self, network, config=None):
        """
        Basic neural network trainer.
        :type network: deepy.NeuralNetwork
        :type config: deepy.conf.TrainerConfig
        :return:
        """
        super(NeuralTrainer, self).__init__()

        self.config = None
        if isinstance(config, TrainerConfig):
            self.config = config
        elif isinstance(config, dict):
            self.config = TrainerConfig(config)
        else:
            self.config = TrainerConfig()
        self.network = network

        self.network.prepare_training()

        self._setup_costs()

        logging.info("compile evaluation function")
        self.evaluation_func = theano.function(
            network.input_variables + network.target_variables, self.evaluation_variables, updates=network.updates,
            allow_input_downcast=True, mode=self.config.get("theano_mode", None))
        self.learning_func = None

        self.validation_frequency = self.config.validation_frequency
        self.min_improvement = self.config.min_improvement
        self.patience = self.config.patience

        self.best_cost = 1e100
        self.best_iter = 0
        self.best_params = self._copy_network_params()

    def _setup_costs(self):
        self.cost = self._add_regularization(self.network.cost)
        self.test_cost = self._add_regularization(self.network.test_cost)
        self.training_variables = [self.cost]
        self.training_names = ['J']
        for name, monitor in self.network.training_monitors:
            self.training_names.append(name)
            self.training_variables.append(monitor)
        logging.info("monitor list: %s" % ",".join(self.training_names))

        self.evaluation_variables = [self.test_cost]
        self.evaluation_names = ['J']
        for name, monitor in self.network.testing_monitors:
            self.evaluation_names.append(name)
            self.evaluation_variables.append(monitor)

    def _add_regularization(self, cost):
        if self.config.weight_l1 > 0:
            logging.info("L1 weight regularization: %f" % self.config.weight_l1)
            cost += self.config.weight_l1 * sum(abs(w).sum() for w in self.network.parameters)
        if self.config.hidden_l1 > 0:
            logging.info("L1 hidden unit regularization: %f" % self.config.hidden_l1)
            cost += self.config.hidden_l1 * sum(abs(h).mean(axis=0).sum() for h in self.network._hidden_outputs)
        if self.config.hidden_l2 > 0:
            logging.info("L2 hidden unit regularization: %f" % self.config.hidden_l2)
            cost += self.config.hidden_l2 * sum((h * h).mean(axis=0).sum() for h in self.network._hidden_outputs)

        return cost

    def set_params(self, targets, free_params=None):
        for param, target in zip(self.network.parameters, targets):
            param.set_value(target)
        if free_params:
            for param, param_value in zip(self.network.free_parameters, free_params):
                param.set_value(param_value)

    def save_params(self, path):
        self.set_params(*self.best_params)
        self.network.save_params(path)

    def load_params(self, path):
        self.network.load_params(path)
        self.best_params = self._copy_network_params()

    def _copy_network_params(self):
        checkpoint = (map(lambda p: p.get_value().copy(), self.network.parameters),
                      map(lambda p: p.get_value().copy(), self.network.free_parameters))
        return checkpoint


    def train(self, train_set, valid_set=None, test_set=None, train_size=None):
        """
        Train the model and return costs.
        """
        if not self.learning_func:
            raise NotImplementedError
        iteration = 0
        while True:
            # Test
            if not iteration % self.config.test_frequency and test_set:
                try:
                    self._run_test(iteration, test_set)
                except KeyboardInterrupt:
                    logging.info('interrupted!')
                    break
            # Validate
            if not iteration % self.validation_frequency and valid_set:
                try:

                    if not self._run_valid(iteration, valid_set):
                        logging.info('patience elapsed, bailing out')
                        break
                except KeyboardInterrupt:
                    logging.info('interrupted!')
                    break
            # Train one step
            try:
                costs = self._run_train(iteration, train_set, train_size)
            except KeyboardInterrupt:
                logging.info('interrupted!')
                break
            # Check costs
            if np.isnan(costs[0][1]):
                logging.info("NaN detected in costs, rollback to last parameters")
                self.set_params(*self.checkpoint)
            else:
                iteration += 1
                self.network.epoch_callback()

            yield dict(costs)

        if valid_set and self.config.get("save_best_parameters", True):
            self.set_params(*self.best_params)
        if test_set:
            self._run_test(-1, test_set)

    def _run_test(self, iteration, test_set):
        """
        Run on test iteration.
        """
        costs = self.test_step(test_set)
        info = ' '.join('%s=%.2f' % el for el in costs)
        message = "test    (iter=%i) %s" % (iteration + 1, info)
        logging.info(message)
        self.network.train_logger.record(message)

    def _run_train(self, iteration, train_set, train_size=None):
        """
        Run one training iteration.
        """
        costs = self.train_step(train_set, train_size)

        if not iteration % self.config.monitor_frequency:
            info = " ".join("%s=%.2f" % item for item in costs)
            message = "monitor (iter=%i) %s" % (iteration + 1, info)
            logging.info(message)
            self.network.train_logger.record(message)
        return costs

    def _run_valid(self, iteration, valid_set):
        """
        Run one valid iteration, return true if to continue training.
        """
        costs = self.valid_step(valid_set)
        # this is the same as: (J_i - J_f) / J_i > min improvement
        _, J = costs[0]
        if self.best_cost - J > self.best_cost * self.min_improvement:
            self.best_cost = J
            self.best_iter = iteration
            self.best_params = self._copy_network_params()
            marker = ' *'
        else:
            marker = ""
        info = ' '.join('%s=%.2f' % el for el in costs)
        message = "valid   (iter=%i) %s%s" % (iteration + 1, info, marker)
        logging.info(message)
        self.network.train_logger.record(message)
        self.checkpoint = self._copy_network_params()
        return iteration - self.best_iter < self.patience

    def test_step(self, test_set):
        costs = list(zip(
            self.evaluation_names,
            np.mean([self.evaluation_func(*x) for x in test_set], axis=0)))
        return costs

    def valid_step(self, valid_set):
        costs = list(zip(
            self.evaluation_names,
            np.mean([self.evaluation_func(*x) for x in valid_set], axis=0)))
        return costs

    def train_step(self, train_set, train_size=None):
        training_callback = bool(self.network.training_callbacks)
        cost_matrix = []
        c = 0

        for x in train_set:
            cost_x = self.learning_func(*x)
            cost_matrix.append(cost_x)
            if training_callback:
                self.network.training_callback()
            if train_size:
                c += 1
                sys.stdout.write("\r> %d%%" % (c * 100 / train_size))
                sys.stdout.flush()

        if train_size:
            sys.stdout.write("\r")
            sys.stdout.flush()
        costs = list(zip(self.training_names, np.mean(cost_matrix, axis=0)))
        return costs

    def run(self, train_set, valid_set=None, test_set=None, train_size=None, controllers=None):
        """
        Run until the end.
        """
        if isinstance(train_set, Dataset):
            dataset = train_set
            train_set = dataset.train_set()
            valid_set = dataset.valid_set()
            test_set = dataset.test_set()
            train_size = dataset.train_size()

        timer = Timer()
        for _ in self.train(train_set, valid_set=valid_set, test_set=test_set, train_size=train_size):
            if controllers:
                ending = False
                for controller in controllers:
                    if hasattr(controller, 'invoke') and controller.invoke():
                        ending = True
                if ending:
                    break
        timer.report()
        return
示例#14
0
        model.stack(
            HighwayLayerLRDiagDropoutBatchNorm(activation=activation,
                                               gate_bias=gate_bias,
                                               projection_dim=d,
                                               d_p_0=dropout_p_h_0,
                                               d_p_1=dropout_p_h_1,
                                               init=init,
                                               quasi_ortho_init=True))
    #model.stack(BatchNormalization(),Dropout(p=dropout_p_2), Dense(10, init=init))
    model.stack(Dropout(p=dropout_p_2), Dense(10, init=init))

    learning_rate_start = 3e-3
    #learning_rate_target = 3e-7
    #learning_rate_epochs = 100
    #learning_rate_decay  = (learning_rate_target / learning_rate_start) ** (1.0 / learning_rate_epochs)
    conf = TrainerConfig()
    conf.learning_rate = LearningRateAnnealer.learning_rate(
        learning_rate_start)
    #conf.gradient_clipping = 1
    conf.patience = 20
    #conf.gradient_tolerance = 5
    conf.avoid_nan = True
    conf.min_improvement = 1e-10

    #trainer = MomentumTrainer(model)
    trainer = AdamTrainer(model, conf)

    mnist = MiniBatches(MnistDataset(), batch_size=100)
    #mnist = MiniBatches(MnistDatasetSmallValid(), batch_size=100)

    #trainer.run(mnist, controllers=[IncrementalLearningRateAnnealer(trainer, 0, learning_rate_decay)])
示例#15
0
文件: base.py 项目: bluetit/deepy
class NeuralTrainer(object):
    """
    A base class for all trainers.
    """
    __metaclass__ = ABCMeta

    def __init__(self, network, config=None):
        """
        Basic neural network trainer.
        :type network: deepy.NeuralNetwork
        :type config: deepy.conf.TrainerConfig
        :return:
        """
        super(NeuralTrainer, self).__init__()

        self.config = None
        if isinstance(config, TrainerConfig):
            self.config = config
        elif isinstance(config, dict):
            self.config = TrainerConfig(config)
        else:
            self.config = TrainerConfig()
        # Model and network all refer to the computational graph
        self.model = self.network = network

        self.network.prepare_training()
        self._setup_costs()

        self.evaluation_func = None

        self.validation_frequency = self.config.validation_frequency
        self.min_improvement = self.config.min_improvement
        self.patience = self.config.patience
        self._iter_callbacks = []

        self.best_cost = 1e100
        self.best_iter = 0
        self.best_params = self.copy_params()
        self._skip_batches = 0
        self._progress = 0
        self.last_cost = 0
        self.last_run_costs = None
        self._report_time = True

    def _compile_evaluation_func(self):
        if not self.evaluation_func:
            logging.info("compile evaluation function")
            self.evaluation_func = theano.function(
                self.network.input_variables + self.network.target_variables,
                self.evaluation_variables,
                updates=self.network.updates,
                allow_input_downcast=True, mode=self.config.get("theano_mode", None))

    def skip(self, n_batches):
        """
        Skip N batches in the training.
        """
        logging.info("Skip %d batches" % n_batches)
        self._skip_batches = n_batches

    def _setup_costs(self):
        self.cost = self._add_regularization(self.network.cost)
        self.test_cost = self._add_regularization(self.network.test_cost)
        self.training_variables = [self.cost]
        self.training_names = ['J']
        for name, monitor in self.network.training_monitors:
            self.training_names.append(name)
            self.training_variables.append(monitor)
        logging.info("monitor list: %s" % ",".join(self.training_names))

        self.evaluation_variables = [self.test_cost]
        self.evaluation_names = ['J']
        for name, monitor in self.network.testing_monitors:
            self.evaluation_names.append(name)
            self.evaluation_variables.append(monitor)

    def _add_regularization(self, cost):
        if self.config.weight_l1 > 0:
            logging.info("L1 weight regularization: %f" % self.config.weight_l1)
            cost += self.config.weight_l1 * sum(abs(w).sum() for w in self.network.parameters)
        if self.config.hidden_l1 > 0:
            logging.info("L1 hidden unit regularization: %f" % self.config.hidden_l1)
            cost += self.config.hidden_l1 * sum(abs(h).mean(axis=0).sum() for h in self.network._hidden_outputs)
        if self.config.hidden_l2 > 0:
            logging.info("L2 hidden unit regularization: %f" % self.config.hidden_l2)
            cost += self.config.hidden_l2 * sum((h * h).mean(axis=0).sum() for h in self.network._hidden_outputs)

        return cost

    def set_params(self, targets, free_params=None):
        for param, target in zip(self.network.parameters, targets):
            param.set_value(target)
        if free_params:
            for param, param_value in zip(self.network.free_parameters, free_params):
                param.set_value(param_value)

    def save_params(self, path):
        self.set_params(*self.best_params)
        self.network.save_params(path)

    def load_params(self, path, exclude_free_params=False):
        """
        Load parameters for the training.
        This method can load free parameters and resume the training progress.
        """
        self.network.load_params(path, exclude_free_params=exclude_free_params)
        self.best_params = self.copy_params()
        # Resume the progress
        if self.network.train_logger.progress() > 0:
            self.skip(self.network.train_logger.progress())

    def copy_params(self):
        checkpoint = (map(lambda p: p.get_value().copy(), self.network.parameters),
                      map(lambda p: p.get_value().copy(), self.network.free_parameters))
        return checkpoint

    def add_iter_callback(self, func):
        """
        Add a iteration callback function (receives an argument of the trainer).
        :return:
        """
        self._iter_callbacks.append(func)

    def train(self, train_set, valid_set=None, test_set=None, train_size=None):
        """
        Train the model and return costs.
        """
        epoch = 0
        while True:
            # Test
            if not epoch % self.config.test_frequency and test_set:
                try:
                    self._run_test(epoch, test_set)
                except KeyboardInterrupt:
                    logging.info('interrupted!')
                    break
            # Validate
            if not epoch % self.validation_frequency and valid_set:
                try:

                    if not self._run_valid(epoch, valid_set):
                        logging.info('patience elapsed, bailing out')
                        break
                except KeyboardInterrupt:
                    logging.info('interrupted!')
                    break
            # Train one step
            try:
                costs = self._run_train(epoch, train_set, train_size)
            except KeyboardInterrupt:
                logging.info('interrupted!')
                break
            # Check costs
            if np.isnan(costs[0][1]):
                logging.info("NaN detected in costs, rollback to last parameters")
                self.set_params(*self.checkpoint)
            else:
                epoch += 1
                self.network.epoch_callback()

            yield dict(costs)

        if valid_set and self.config.get("save_best_parameters", True):
            self.set_params(*self.best_params)
        if test_set:
            self._run_test(-1, test_set)

    @abstractmethod
    def learn(self, *variables):
        """
        Update the parameters and return the cost with given data points.
        :param variables:
        :return:
        """

    def _run_test(self, iteration, test_set):
        """
        Run on test iteration.
        """
        costs = self.test_step(test_set)
        info = ' '.join('%s=%.2f' % el for el in costs)
        message = "test    (epoch=%i) %s" % (iteration + 1, info)
        logging.info(message)
        self.network.train_logger.record(message)
        self.last_run_costs = costs

    def _run_train(self, iteration, train_set, train_size=None):
        """
        Run one training iteration.
        """
        costs = self.train_step(train_set, train_size)
        if not iteration % self.config.monitor_frequency:
            info = " ".join("%s=%.2f" % item for item in costs)
            message = "monitor (epoch=%i) %s" % (iteration + 1, info)
            logging.info(message)
            self.network.train_logger.record(message)
        self.last_run_costs = costs
        return costs

    def _run_valid(self, iteration, valid_set, dry_run=False):
        """
        Run one valid iteration, return true if to continue training.
        """
        costs = self.valid_step(valid_set)
        # this is the same as: (J_i - J_f) / J_i > min improvement
        _, J = costs[0]
        marker = ""
        if self.best_cost - J > self.best_cost * self.min_improvement:
            # save the best cost and parameters
            self.best_params = self.copy_params()
            marker = ' *'
            if not dry_run:
                self.best_cost = J
                self.best_iter = iteration

            if self.config.auto_save:
                self.network.train_logger.record_progress(self._progress)
                self.network.save_params(self.config.auto_save, new_thread=True)

        info = ' '.join('%s=%.2f' % el for el in costs)
        epoch = "epoch=%d" % (iteration + 1)
        if dry_run:
            epoch = "dryrun" + " " * (len(epoch) - 6)
        message = "valid   (%s) %s%s" % (epoch, info, marker)
        logging.info(message)
        self.last_run_costs = costs
        self.network.train_logger.record(message)
        self.checkpoint = self.copy_params()
        return iteration - self.best_iter < self.patience

    def test_step(self, test_set):
        self._compile_evaluation_func()
        costs = list(zip(
            self.evaluation_names,
            np.mean([self.evaluation_func(*x) for x in test_set], axis=0)))
        return costs

    def valid_step(self, valid_set):
        self._compile_evaluation_func()
        costs = list(zip(
            self.evaluation_names,
            np.mean([self.evaluation_func(*x) for x in valid_set], axis=0)))
        return costs

    def train_step(self, train_set, train_size=None):
        dirty_trick_times = 0
        network_callback = bool(self.network.training_callbacks)
        trainer_callback = bool(self._iter_callbacks)
        cost_matrix = []
        self._progress = 0

        for x in train_set:
            if self._skip_batches == 0:

                if dirty_trick_times > 0:
                    cost_x = self.learn(*[t[:(t.shape[0]/2)] for t in x])
                    cost_matrix.append(cost_x)
                    cost_x = self.learn(*[t[(t.shape[0]/2):] for t in x])
                    dirty_trick_times -= 1
                else:
                    try:
                        cost_x = self.learn(*x)
                    except MemoryError:
                        logging.info("Memory error was detected, perform dirty trick 30 times")
                        dirty_trick_times = 30
                        # Dirty trick
                        cost_x = self.learn(*[t[:(t.shape[0]/2)] for t in x])
                        cost_matrix.append(cost_x)
                        cost_x = self.learn(*[t[(t.shape[0]/2):] for t in x])
                cost_matrix.append(cost_x)
                self.last_cost = cost_x[0]
                if network_callback:
                    self.network.training_callback()
                if trainer_callback:
                    for func in self._iter_callbacks:
                        func(self)
            else:
                self._skip_batches -= 1
            if train_size:
                self._progress += 1
                sys.stdout.write("\x1b[2K\r> %d%% | J=%.2f" % (self._progress * 100 / train_size, self.last_cost))
                sys.stdout.flush()
        self._progress = 0

        if train_size:
            sys.stdout.write("\r")
            sys.stdout.flush()
        costs = list(zip(self.training_names, np.mean(cost_matrix, axis=0)))
        return costs

    def run(self, train_set, valid_set=None, test_set=None, train_size=None, controllers=None):
        """
        Run until the end.
        """
        if isinstance(train_set, Dataset):
            dataset = train_set
            train_set = dataset.train_set()
            valid_set = dataset.valid_set()
            test_set = dataset.test_set()
            train_size = dataset.train_size()

        timer = Timer()
        for _ in self.train(train_set, valid_set=valid_set, test_set=test_set, train_size=train_size):
            if controllers:
                ending = False
                for controller in controllers:
                    if hasattr(controller, 'invoke') and controller.invoke():
                        ending = True
                if ending:
                    break
        if self._report_time:
            timer.report()
示例#16
0
if __name__ == '__main__':

    ap = ArgumentParser()
    ap.add_argument("--model", default=os.path.join(os.path.dirname(__file__), "models", "sequence_adding_100_2.gz"))
    args = ap.parse_args()

    model = NeuralRegressor(input_dim=2, input_tensor=3)
    model.stack(IRNN(hidden_size=100, input_type="sequence",
                     output_type="one"),
                      Dense(1))

    if os.path.exists(args.model):
        model.load_params(args.model)

    conf = TrainerConfig()
    conf.learning_rate = LearningRateAnnealer.learning_rate(0.01)
    conf.gradient_clipping = 3
    conf.patience = 50
    conf.gradient_tolerance = 5
    conf.avoid_nan = False
    trainer = SGDTrainer(model, conf)

    annealer = LearningRateAnnealer(patience=20)

    trainer.run(batch_set, controllers=[annealer])

    model.save_params(args.model)
    print "Identity matrix weight:"
    print model.first_layer().W_h.get_value().diagonal()
示例#17
0
class NeuralTrainer(object):
    """
    A base class for all trainers.
    """
    __metaclass__ = ABCMeta

    def __init__(self, network, config=None):
        """
        Basic neural network trainer.
        :type network: deepy.NeuralNetwork
        :type config: deepy.conf.TrainerConfig
        :return:
        """
        super(NeuralTrainer, self).__init__()

        self.config = None
        if isinstance(config, TrainerConfig):
            self.config = config
        elif isinstance(config, dict):
            self.config = TrainerConfig(config)
        else:
            self.config = TrainerConfig()
        # Model and network all refer to the computational graph
        self.model = self.network = network

        self.network.prepare_training()
        self._setup_costs()

        self.evaluation_func = None

        self.validation_frequency = self.config.validation_frequency
        self.min_improvement = self.config.min_improvement
        self.patience = self.config.patience
        self._iter_callbacks = []

        self.best_cost = 1e100
        self.best_iter = 0
        self.best_params = self.copy_params()
        self._skip_batches = 0
        self._progress = 0
        self.last_cost = 0

    def _compile_evaluation_func(self):
        if not self.evaluation_func:
            logging.info("compile evaluation function")
            self.evaluation_func = theano.function(
                self.network.input_variables + self.network.target_variables,
                self.evaluation_variables,
                updates=self.network.updates,
                allow_input_downcast=True,
                mode=self.config.get("theano_mode", None))

    def skip(self, n_batches):
        """
        Skip N batches in the training.
        """
        logging.info("Skip %d batches" % n_batches)
        self._skip_batches = n_batches

    def _setup_costs(self):
        self.cost = self._add_regularization(self.network.cost)
        self.test_cost = self._add_regularization(self.network.test_cost)
        self.training_variables = [self.cost]
        self.training_names = ['J']
        for name, monitor in self.network.training_monitors:
            self.training_names.append(name)
            self.training_variables.append(monitor)
        logging.info("monitor list: %s" % ",".join(self.training_names))

        self.evaluation_variables = [self.test_cost]
        self.evaluation_names = ['J']
        for name, monitor in self.network.testing_monitors:
            self.evaluation_names.append(name)
            self.evaluation_variables.append(monitor)

    def _add_regularization(self, cost):
        if self.config.weight_l1 > 0:
            logging.info("L1 weight regularization: %f" %
                         self.config.weight_l1)
            cost += self.config.weight_l1 * sum(
                abs(w).sum() for w in self.network.parameters)
        if self.config.hidden_l1 > 0:
            logging.info("L1 hidden unit regularization: %f" %
                         self.config.hidden_l1)
            cost += self.config.hidden_l1 * sum(
                abs(h).mean(axis=0).sum()
                for h in self.network._hidden_outputs)
        if self.config.hidden_l2 > 0:
            logging.info("L2 hidden unit regularization: %f" %
                         self.config.hidden_l2)
            cost += self.config.hidden_l2 * sum(
                (h * h).mean(axis=0).sum()
                for h in self.network._hidden_outputs)

        return cost

    def set_params(self, targets, free_params=None):
        for param, target in zip(self.network.parameters, targets):
            param.set_value(target)
        if free_params:
            for param, param_value in zip(self.network.free_parameters,
                                          free_params):
                param.set_value(param_value)

    def save_params(self, path):
        self.set_params(*self.best_params)
        self.network.save_params(path)

    def load_params(self, path, exclude_free_params=False):
        """
        Load parameters for the training.
        This method can load free parameters and resume the training progress.
        """
        self.network.load_params(path, exclude_free_params=exclude_free_params)
        self.best_params = self.copy_params()
        # Resume the progress
        if self.network.train_logger.progress() > 0:
            self.skip(self.network.train_logger.progress())

    def copy_params(self):
        checkpoint = (map(lambda p: p.get_value().copy(),
                          self.network.parameters),
                      map(lambda p: p.get_value().copy(),
                          self.network.free_parameters))
        return checkpoint

    def add_iter_callback(self, func):
        """
        Add a iteration callback function (receives an argument of the trainer).
        :return:
        """
        self._iter_callbacks.append(func)

    def train(self, train_set, valid_set=None, test_set=None, train_size=None):
        """
        Train the model and return costs.
        """
        iteration = 0
        while True:
            # Test
            if not iteration % self.config.test_frequency and test_set:
                try:
                    self._run_test(iteration, test_set)
                except KeyboardInterrupt:
                    logging.info('interrupted!')
                    break
            # Validate
            if not iteration % self.validation_frequency and valid_set:
                try:

                    if not self._run_valid(iteration, valid_set):
                        logging.info('patience elapsed, bailing out')
                        break
                except KeyboardInterrupt:
                    logging.info('interrupted!')
                    break
            # Train one step
            try:
                costs = self._run_train(iteration, train_set, train_size)
            except KeyboardInterrupt:
                logging.info('interrupted!')
                break
            # Check costs
            if np.isnan(costs[0][1]):
                logging.info(
                    "NaN detected in costs, rollback to last parameters")
                self.set_params(*self.checkpoint)
            else:
                iteration += 1
                self.network.epoch_callback()

            yield dict(costs)

        if valid_set and self.config.get("save_best_parameters", True):
            self.set_params(*self.best_params)
        if test_set:
            self._run_test(-1, test_set)

    @abstractmethod
    def learn(self, *variables):
        """
        Update the parameters and return the cost with given data points.
        :param variables:
        :return:
        """

    def _run_test(self, iteration, test_set):
        """
        Run on test iteration.
        """
        costs = self.test_step(test_set)
        info = ' '.join('%s=%.2f' % el for el in costs)
        message = "test    (iter=%i) %s" % (iteration + 1, info)
        logging.info(message)
        self.network.train_logger.record(message)

    def _run_train(self, iteration, train_set, train_size=None):
        """
        Run one training iteration.
        """
        costs = self.train_step(train_set, train_size)

        if not iteration % self.config.monitor_frequency:
            info = " ".join("%s=%.2f" % item for item in costs)
            message = "monitor (iter=%i) %s" % (iteration + 1, info)
            logging.info(message)
            self.network.train_logger.record(message)
        return costs

    def _run_valid(self, iteration, valid_set, dry_run=False):
        """
        Run one valid iteration, return true if to continue training.
        """
        costs = self.valid_step(valid_set)
        # this is the same as: (J_i - J_f) / J_i > min improvement
        _, J = costs[0]
        marker = ""
        if self.best_cost - J > self.best_cost * self.min_improvement:
            # save the best cost and parameters
            self.best_params = self.copy_params()
            marker = ' *'
            if not dry_run:
                self.best_cost = J
                self.best_iter = iteration

            if self.config.auto_save:
                self.network.train_logger.record_progress(self._progress)
                self.network.save_params(self.config.auto_save,
                                         new_thread=True)

        info = ' '.join('%s=%.2f' % el for el in costs)
        iter_str = "iter=%d" % (iteration + 1)
        if dry_run:
            iter_str = "dryrun" + " " * (len(iter_str) - 6)
        message = "valid   (%s) %s%s" % (iter_str, info, marker)
        logging.info(message)
        self.network.train_logger.record(message)
        self.checkpoint = self.copy_params()
        return iteration - self.best_iter < self.patience

    def test_step(self, test_set):
        self._compile_evaluation_func()
        costs = list(
            zip(self.evaluation_names,
                np.mean([self.evaluation_func(*x) for x in test_set], axis=0)))
        return costs

    def valid_step(self, valid_set):
        self._compile_evaluation_func()
        costs = list(
            zip(self.evaluation_names,
                np.mean([self.evaluation_func(*x) for x in valid_set],
                        axis=0)))
        return costs

    def train_step(self, train_set, train_size=None):
        dirty_trick_times = 0
        network_callback = bool(self.network.training_callbacks)
        trainer_callback = bool(self._iter_callbacks)
        cost_matrix = []
        self._progress = 0

        for x in train_set:
            if self._skip_batches == 0:
                if dirty_trick_times > 0:
                    cost_x = self.learn(*[t[:(t.shape[0] / 2)] for t in x])
                    cost_matrix.append(cost_x)
                    cost_x = self.learn(*[t[(t.shape[0] / 2):] for t in x])
                    dirty_trick_times -= 1
                else:
                    try:
                        cost_x = self.learn(*x)
                    except MemoryError:
                        logging.info(
                            "Memory error was detected, perform dirty trick 30 times"
                        )
                        dirty_trick_times = 30
                        # Dirty trick
                        cost_x = self.learn(*[t[:(t.shape[0] / 2)] for t in x])
                        cost_matrix.append(cost_x)
                        cost_x = self.learn(*[t[(t.shape[0] / 2):] for t in x])
                cost_matrix.append(cost_x)
                self.last_cost = cost_x[0]
                if network_callback:
                    self.network.training_callback()
                if trainer_callback:
                    for func in self._iter_callbacks:
                        func(self)
            else:
                self._skip_batches -= 1
            if train_size:
                self._progress += 1
                sys.stdout.write(
                    "\x1b[2K\r> %d%% | J=%.2f" %
                    (self._progress * 100 / train_size, self.last_cost))
                sys.stdout.flush()
        self._progress = 0

        if train_size:
            sys.stdout.write("\r")
            sys.stdout.flush()
        costs = list(zip(self.training_names, np.mean(cost_matrix, axis=0)))
        return costs

    def run(self,
            train_set,
            valid_set=None,
            test_set=None,
            train_size=None,
            controllers=None):
        """
        Run until the end.
        """
        if isinstance(train_set, Dataset):
            dataset = train_set
            train_set = dataset.train_set()
            valid_set = dataset.valid_set()
            test_set = dataset.test_set()
            train_size = dataset.train_size()

        timer = Timer()
        for _ in self.train(train_set,
                            valid_set=valid_set,
                            test_set=test_set,
                            train_size=train_size):
            if controllers:
                ending = False
                for controller in controllers:
                    if hasattr(controller, 'invoke') and controller.invoke():
                        ending = True
                if ending:
                    break
        timer.report()
        return
    model = L2HingeNeuralClassifier(input_dim=28*28, last_layer_l2_regularization = l2_reg)
    model.stack(Dropout(p=dropout_p_0), Dense(n, init=init, disable_bias=True), BatchNormalization(), Activation(activation))
    #model.stack(Dropout(p=dropout_p_0), BatchNormalization())

    for _ in range(T):
        #model.stack(HighwayLayerLRDropoutBatchNorm(activation=activation, gate_bias=gate_bias, projection_dim=d, d_p_0 = dropout_p_h_0, d_p_1 = dropout_p_h_1, init=init))
        model.stack(HighwayLayerLRDiagDropoutBatchNorm(activation=activation, gate_bias=gate_bias, projection_dim=d, d_p_0 = dropout_p_h_0, d_p_1 = dropout_p_h_1, init=init, quasi_ortho_init=True))
    #model.stack(BatchNormalization(),Dropout(p=dropout_p_2), Dense(10, init=init))
    model.stack(Dropout(p=dropout_p_2), Dense(10, init=init))

    
    learning_rate_start  = 3e-3
    #learning_rate_target = 3e-7
    #learning_rate_epochs = 100
    #learning_rate_decay  = (learning_rate_target / learning_rate_start) ** (1.0 / learning_rate_epochs)
    conf = TrainerConfig()
    conf.learning_rate = LearningRateAnnealer.learning_rate(learning_rate_start)
    #conf.gradient_clipping = 1
    conf.patience = 20
    #conf.gradient_tolerance = 5
    conf.avoid_nan = True
    conf.min_improvement = 1e-10

    #trainer = MomentumTrainer(model)
    trainer = AdamTrainer(model, conf)

    mnist = MiniBatches(MnistDataset(), batch_size=100)
    #mnist = MiniBatches(MnistDatasetSmallValid(), batch_size=100)

    #trainer.run(mnist, controllers=[IncrementalLearningRateAnnealer(trainer, 0, learning_rate_decay)])
    trainer.run(mnist, controllers=[LearningRateAnnealer(trainer, 3, 14)])
示例#19
0
文件: rnn.py 项目: 52nlp/deepy
import numpy as np

from deepy.layers.recurrent import RecurrentLayer, RecurrentNetwork
from deepy.conf import NetworkConfig, TrainerConfig
from deepy.utils.functions import FLOATX
from deepy import SGDTrainer


logging.basicConfig(level=logging.INFO)

if __name__ == '__main__':
    net_conf = NetworkConfig(input_size=6)
    net_conf.layers = [RecurrentLayer(size=10, activation='sigmoid', bptt=True)]

    trainer_conf = TrainerConfig()
    trainer_conf.learning_rate = 0.03
    trainer_conf.weight_l2 = 0.0001
    trainer_conf.hidden_l2 = 0.0001
    trainer_conf.monitor_frequency = trainer_conf.validation_frequency = trainer_conf.test_frequency = 1

    network = RecurrentNetwork(net_conf)
    trainer = SGDTrainer(network)

    data = np.array([[1,0,0,0,0,0],
                     [0,1,0,0,0,0],
                     [0,0,1,0,0,0],
                     [0,0,0,1,0,0],
                     [0,0,0,0,1,0],
                     [0,0,0,0,0,1],
                     [0,1,0,0,0,0],
示例#20
0
batch_set = MiniBatches(dataset, batch_size=32)

if __name__ == '__main__':

    ap = ArgumentParser()
    ap.add_argument("--model", default=os.path.join(os.path.dirname(__file__), "models", "sequence_adding_100_2.gz"))
    args = ap.parse_args()

    model = NeuralRegressor(input_dim=2, input_tensor=3, clip_value=3.)
    model.stack_layer(IRNN(hidden_size=100, output_size=1, input_type="sequence",
                     output_type="one", output_activation="linear"))

    if os.path.exists(args.model):
        model.load_params(args.model)

    conf = TrainerConfig()
    conf.learning_rate = LearningRateAnnealer.learning_rate(0.01)
    conf.max_norm = 1
    conf.patience = 50
    conf.gradient_tolerance = 5
    trainer = SGDTrainer(model, conf)

    annealer = LearningRateAnnealer(trainer, patience=20)

    trainer.run(batch_set, controllers=[annealer])

    model.save_params(args.model)
    print "Identity matrix weight:"
    print model.first_layer().W_h.get_value().diagonal()