def __init__(self, network, config=None): """ Basic neural network trainer. :type network: deepy.NeuralNetwork :type config: deepy.conf.TrainerConfig :return: """ super(NeuralTrainer, self).__init__() self.config = None if isinstance(config, TrainerConfig): self.config = config elif isinstance(config, dict): self.config = TrainerConfig(config) else: self.config = TrainerConfig() # Model and network all refer to the computational graph self.model = self.network = network self.network.prepare_training() self._setup_costs() self.evaluation_func = None self.validation_frequency = self.config.validation_frequency self.min_improvement = self.config.min_improvement self.patience = self.config.patience self._iter_callbacks = [] self.best_cost = 1e100 self.best_iter = 0 self.best_params = self.copy_params() self._skip_batches = 0 self._progress = 0 self.last_cost = 0
def __init__(self, network, config=None, method=None): if method: logging.info("changing optimization method to '%s'" % method) if not config: config = TrainerConfig() elif isinstance(config, dict): config = TrainerConfig(config) config.method = method super(GeneralNeuralTrainer, self).__init__(network, config) logging.info('compiling %s learning function', self.__class__.__name__) network_updates = list(network.updates) + list(network.training_updates) learning_updates = list(self.learning_updates()) update_list = network_updates + learning_updates logging.info("network updates: %s" % " ".join(map(str, [x[0] for x in network_updates]))) logging.info("learning updates: %s" % " ".join(map(str, [x[0] for x in learning_updates]))) self.learning_func = theano.function( network.input_variables + network.target_variables, map(lambda v: theano.Out(v, borrow=True), self.training_variables), updates=update_list, allow_input_downcast=True, mode=self.config.get("theano_mode", None))
def __init__(self, network, config=None): """ Basic neural network trainer. :type network: deepy.NeuralNetwork :type config: deepy.conf.TrainerConfig :return: """ super(NeuralTrainer, self).__init__() self.config = None if isinstance(config, TrainerConfig): self.config = config elif isinstance(config, dict): self.config = TrainerConfig(config) else: self.config = TrainerConfig() self.network = network self.network.prepare_training() self._setup_costs() logging.info("compile evaluation function") self.evaluation_func = theano.function( network.input_variables + network.target_variables, self.evaluation_variables, updates=network.updates, allow_input_downcast=True, mode=self.config.get("theano_mode", None)) self.learning_func = None self.validation_frequency = self.config.validation_frequency self.min_improvement = self.config.min_improvement self.patience = self.config.patience self.best_cost = 1e100 self.best_iter = 0 self.best_params = self._copy_network_params()
def __init__(self, network, method=None, config=None, annealer=None, validator=None): if method: logging.info("changing optimization method to '%s'" % method) if not config: config = TrainerConfig() elif isinstance(config, dict): config = TrainerConfig(config) config.method = method super(GeneralNeuralTrainer, self).__init__(network, config, annealer=annealer, validator=validator) self._learning_func = None
def __init__(self, network, config=None, method=None): if method: logging.info("changing optimization method to '%s'" % method) if not config: config = TrainerConfig() elif isinstance(config, dict): config = TrainerConfig(config) config.method = method super(GeneralNeuralTrainer, self).__init__(network, config) self._learning_func = None
def __init__(self, state_num, action_num, experience_replay=True): self.state_num = state_num self.action_num = action_num self.experience_replay = experience_replay self.experience_pool = [] self.model = get_model(state_num, action_num) train_conf = TrainerConfig() train_conf.learning_rate = LEARNING_RATE train_conf.weight_l2 = 0 self.trainer = SGDTrainer(self.model, train_conf) self.trainer.training_names = [] self.trainer.training_variables = [] self.thread_lock = threading.Lock() self.epsilon = EPSILON self.tick = 0
def __init__(self, network, config=None): """ Basic neural network trainer. :type network: deepy.NeuralNetwork :type config: deepy.conf.TrainerConfig :return: """ super(NeuralTrainer, self).__init__() self.config = None if isinstance(config, TrainerConfig): self.config = config elif isinstance(config, dict): self.config = TrainerConfig(config) else: self.config = TrainerConfig() self.network = network self.network.prepare_training() self._setup_costs() logging.info("compile evaluation function") self.evaluation_func = theano.function( network.input_variables + network.target_variables, self.evaluation_variables, updates=network.updates, allow_input_downcast=True, mode=self.config.get("theano_mode", None)) self.learning_func = None self.validation_frequency = self.config.validation_frequency self.min_improvement = self.config.min_improvement self.patience = self.config.patience self.best_cost = 1e100 self.best_iter = 0 self.best_params = self._copy_network_params() self._skip_batches = 0 self._progress = 0
def __init__(self, network, config=None, method=None): if method: logging.info("changing optimization method to '%s'" % method) if not config: config = TrainerConfig() config.method = method super(GeneralNeuralTrainer, self).__init__(network, config) logging.info('compiling %s learning function', self.__class__.__name__) network_updates = list(network.updates) + list(network.training_updates) learning_updates = list(self.learning_updates()) update_list = network_updates + learning_updates logging.info("network updates: %s" % " ".join(map(str, [x[0] for x in network_updates]))) logging.info("learning updates: %s" % " ".join(map(str, [x[0] for x in learning_updates]))) self.learning_func = theano.function( network.input_variables + network.target_variables, self.training_variables, updates=update_list, allow_input_downcast=True, mode=config.get("theano_mode", theano.Mode(linker=THEANO_LINKER)))
def __init__(self, network, config=None, method=None): if method: logging.info("changing optimization method to '%s'" % method) if not config: config = TrainerConfig() elif isinstance(config, dict): config = TrainerConfig(config) config.method = method super(GeneralNeuralTrainer, self).__init__(network, config) logging.info('compiling %s learning function', self.__class__.__name__) network_updates = list(network.updates) + list( network.training_updates) learning_updates = list(self.learning_updates()) update_list = network_updates + learning_updates logging.info("network updates: %s" % " ".join(map(str, [x[0] for x in network_updates]))) logging.info("learning updates: %s" % " ".join(map(str, [x[0] for x in learning_updates]))) if False and config.data_transmitter: variables = [config.data_transmitter.get_iterator()] givens = config.data_transmitter.get_givens() else: variables = network.input_variables + network.target_variables givens = None self.learning_func = theano.function( variables, map(lambda v: theano.Out(v, borrow=True), self.training_variables), updates=update_list, allow_input_downcast=True, mode=self.config.get("theano_mode", None), givens=givens)
def optimize_updates(params, gradients, config=None, shapes=None): """ General optimization function for Theano. Parameters: params - parameters gradients - gradients config - training config Returns: Theano updates :type config: deepy.TrainerConfig or dict """ if config and isinstance(config, dict): config = TrainerConfig(config) # Clipping if config: clip_value = config.get("gradient_clipping", None) if clip_value: clip_constant = T.constant(clip_value, dtype=FLOATX) if config.avoid_compute_embed_norm: grad_norm = multiple_l2_norm([t[1] for t in zip(params, gradients) if not t[0].name.startswith("W_embed")]) else: grad_norm = multiple_l2_norm(gradients) isnan = T.or_(T.isnan(grad_norm), T.isinf(grad_norm)) multiplier = ifelse(grad_norm < clip_constant, T.constant(1., dtype=FLOATX), clip_constant / (grad_norm + EPSILON)) # Clip clipped_gradients = [] for param, g in zip(params, gradients): g = multiplier * g if config.avoid_nan: g = T.switch(isnan, np.float32(0.1) * param, g) if config.gradient_tolerance: g = ifelse(grad_norm > config.gradient_tolerance, T.zeros_like(g) + EPSILON, g) clipped_gradients.append(g) gradients = clipped_gradients # Regularization if config and config.weight_l2: regularized_gradients = [] for param, grad in zip(params, gradients): grad = grad + (2 * config.weight_l2 * param) regularized_gradients.append(grad) gradients = regularized_gradients # Avoid nan but not computing the norm # This is not recommended if config and config.avoid_nan and not config.gradient_clipping: logging.info("avoid NaN gradients") new_gradients = [] for grad in gradients: new_grad = ifelse(T.isnan(grad).any(), T.zeros_like(grad) + EPSILON, grad) new_gradients.append(new_grad) gradients = new_gradients # Find method method = "SGD" if config: method = config.get("method", method).upper() # Get Function func = None if method in ["SGD", "ADAGRAD", "ADADELTA", "FINETUNING_ADAGRAD"]: from cores.ada_family import ada_family_core func = ada_family_core elif method == "ADAM": from cores.adam import adam_core func = adam_core elif method == "RMSPROP": from cores.rmsprop import rmsprop_core func = rmsprop_core elif method == "MOMENTUM": from cores.momentum import momentum_core func = momentum_core if not func: raise NotImplementedError("method '%s' is not supported" % method) logging.info("optimize method=%s parameters=%s" % (method, str(params))) free_parameters = [] return_vals = wrap_core(func, config, params, gradients) if type(return_vals) == list and type(return_vals[0]) == list: updates, free_parameters = return_vals else: updates = return_vals # No free param recording if config and not config.record_free_params: free_parameters = [] # Weight bound if config.weight_bound: logging.info("apply weight bound of %.2f" % config.weight_bound) new_updates = [] for param, update_value in updates: bounded_value = (update_value * (T.abs_(update_value) <= config.weight_bound) + config.weight_bound * (update_value > config.weight_bound) + -config.weight_bound * (update_value < -config.weight_bound)) new_updates.append((param, bounded_value)) updates = new_updates return updates, free_parameters
class NeuralTrainer(object): '''This is a base class for all trainers.''' def __init__(self, network, config=None): """ Basic neural network trainer. :type network: deepy.NeuralNetwork :type config: deepy.conf.TrainerConfig :return: """ super(NeuralTrainer, self).__init__() self.config = None if isinstance(config, TrainerConfig): self.config = config elif isinstance(config, dict): self.config = TrainerConfig(config) else: self.config = TrainerConfig() self.network = network self.network.prepare_training() self._setup_costs() logging.info("compile evaluation function") self.evaluation_func = theano.function( network.input_variables + network.target_variables, self.evaluation_variables, updates=network.updates, allow_input_downcast=True, mode=self.config.get("theano_mode", None)) self.learning_func = None self.validation_frequency = self.config.validation_frequency self.min_improvement = self.config.min_improvement self.patience = self.config.patience self.best_cost = 1e100 self.best_iter = 0 self.best_params = self._copy_network_params() def _setup_costs(self): self.cost = self._add_regularization(self.network.cost) self.test_cost = self._add_regularization(self.network.test_cost) self.training_variables = [self.cost] self.training_names = ['J'] for name, monitor in self.network.training_monitors: self.training_names.append(name) self.training_variables.append(monitor) logging.info("monitor list: %s" % ",".join(self.training_names)) self.evaluation_variables = [self.test_cost] self.evaluation_names = ['J'] for name, monitor in self.network.testing_monitors: self.evaluation_names.append(name) self.evaluation_variables.append(monitor) def _add_regularization(self, cost): if self.config.weight_l1 > 0: logging.info("L1 weight regularization: %f" % self.config.weight_l1) cost += self.config.weight_l1 * sum(abs(w).sum() for w in self.network.parameters) if self.config.hidden_l1 > 0: logging.info("L1 hidden unit regularization: %f" % self.config.hidden_l1) cost += self.config.hidden_l1 * sum(abs(h).mean(axis=0).sum() for h in self.network._hidden_outputs) if self.config.hidden_l2 > 0: logging.info("L2 hidden unit regularization: %f" % self.config.hidden_l2) cost += self.config.hidden_l2 * sum((h * h).mean(axis=0).sum() for h in self.network._hidden_outputs) return cost def set_params(self, targets, free_params=None): for param, target in zip(self.network.parameters, targets): param.set_value(target) if free_params: for param, param_value in zip(self.network.free_parameters, free_params): param.set_value(param_value) def save_params(self, path): self.set_params(*self.best_params) self.network.save_params(path) def load_params(self, path): self.network.load_params(path) self.best_params = self._copy_network_params() def _copy_network_params(self): checkpoint = (map(lambda p: p.get_value().copy(), self.network.parameters), map(lambda p: p.get_value().copy(), self.network.free_parameters)) return checkpoint def train(self, train_set, valid_set=None, test_set=None, train_size=None): """ Train the model and return costs. """ if not self.learning_func: raise NotImplementedError iteration = 0 while True: # Test if not iteration % self.config.test_frequency and test_set: try: self._run_test(iteration, test_set) except KeyboardInterrupt: logging.info('interrupted!') break # Validate if not iteration % self.validation_frequency and valid_set: try: if not self._run_valid(iteration, valid_set): logging.info('patience elapsed, bailing out') break except KeyboardInterrupt: logging.info('interrupted!') break # Train one step try: costs = self._run_train(iteration, train_set, train_size) except KeyboardInterrupt: logging.info('interrupted!') break # Check costs if np.isnan(costs[0][1]): logging.info("NaN detected in costs, rollback to last parameters") self.set_params(*self.checkpoint) else: iteration += 1 self.network.epoch_callback() yield dict(costs) if valid_set and self.config.get("save_best_parameters", True): self.set_params(*self.best_params) if test_set: self._run_test(-1, test_set) def _run_test(self, iteration, test_set): """ Run on test iteration. """ costs = self.test_step(test_set) info = ' '.join('%s=%.2f' % el for el in costs) message = "test (iter=%i) %s" % (iteration + 1, info) logging.info(message) self.network.train_logger.record(message) def _run_train(self, iteration, train_set, train_size=None): """ Run one training iteration. """ costs = self.train_step(train_set, train_size) if not iteration % self.config.monitor_frequency: info = " ".join("%s=%.2f" % item for item in costs) message = "monitor (iter=%i) %s" % (iteration + 1, info) logging.info(message) self.network.train_logger.record(message) return costs def _run_valid(self, iteration, valid_set): """ Run one valid iteration, return true if to continue training. """ costs = self.valid_step(valid_set) # this is the same as: (J_i - J_f) / J_i > min improvement _, J = costs[0] if self.best_cost - J > self.best_cost * self.min_improvement: self.best_cost = J self.best_iter = iteration self.best_params = self._copy_network_params() marker = ' *' else: marker = "" info = ' '.join('%s=%.2f' % el for el in costs) message = "valid (iter=%i) %s%s" % (iteration + 1, info, marker) logging.info(message) self.network.train_logger.record(message) self.checkpoint = self._copy_network_params() return iteration - self.best_iter < self.patience def test_step(self, test_set): costs = list(zip( self.evaluation_names, np.mean([self.evaluation_func(*x) for x in test_set], axis=0))) return costs def valid_step(self, valid_set): costs = list(zip( self.evaluation_names, np.mean([self.evaluation_func(*x) for x in valid_set], axis=0))) return costs def train_step(self, train_set, train_size=None): training_callback = bool(self.network.training_callbacks) cost_matrix = [] c = 0 for x in train_set: cost_x = self.learning_func(*x) cost_matrix.append(cost_x) if training_callback: self.network.training_callback() if train_size: c += 1 sys.stdout.write("\r> %d%%" % (c * 100 / train_size)) sys.stdout.flush() if train_size: sys.stdout.write("\r") sys.stdout.flush() costs = list(zip(self.training_names, np.mean(cost_matrix, axis=0))) return costs def run(self, train_set, valid_set=None, test_set=None, train_size=None, controllers=None): """ Run until the end. """ if isinstance(train_set, Dataset): dataset = train_set train_set = dataset.train_set() valid_set = dataset.valid_set() test_set = dataset.test_set() train_size = dataset.train_size() timer = Timer() for _ in self.train(train_set, valid_set=valid_set, test_set=test_set, train_size=train_size): if controllers: ending = False for controller in controllers: if hasattr(controller, 'invoke') and controller.invoke(): ending = True if ending: break timer.report() return
model.stack( HighwayLayerLRDiagDropoutBatchNorm(activation=activation, gate_bias=gate_bias, projection_dim=d, d_p_0=dropout_p_h_0, d_p_1=dropout_p_h_1, init=init, quasi_ortho_init=True)) #model.stack(BatchNormalization(),Dropout(p=dropout_p_2), Dense(10, init=init)) model.stack(Dropout(p=dropout_p_2), Dense(10, init=init)) learning_rate_start = 3e-3 #learning_rate_target = 3e-7 #learning_rate_epochs = 100 #learning_rate_decay = (learning_rate_target / learning_rate_start) ** (1.0 / learning_rate_epochs) conf = TrainerConfig() conf.learning_rate = LearningRateAnnealer.learning_rate( learning_rate_start) #conf.gradient_clipping = 1 conf.patience = 20 #conf.gradient_tolerance = 5 conf.avoid_nan = True conf.min_improvement = 1e-10 #trainer = MomentumTrainer(model) trainer = AdamTrainer(model, conf) mnist = MiniBatches(MnistDataset(), batch_size=100) #mnist = MiniBatches(MnistDatasetSmallValid(), batch_size=100) #trainer.run(mnist, controllers=[IncrementalLearningRateAnnealer(trainer, 0, learning_rate_decay)])
class NeuralTrainer(object): """ A base class for all trainers. """ __metaclass__ = ABCMeta def __init__(self, network, config=None): """ Basic neural network trainer. :type network: deepy.NeuralNetwork :type config: deepy.conf.TrainerConfig :return: """ super(NeuralTrainer, self).__init__() self.config = None if isinstance(config, TrainerConfig): self.config = config elif isinstance(config, dict): self.config = TrainerConfig(config) else: self.config = TrainerConfig() # Model and network all refer to the computational graph self.model = self.network = network self.network.prepare_training() self._setup_costs() self.evaluation_func = None self.validation_frequency = self.config.validation_frequency self.min_improvement = self.config.min_improvement self.patience = self.config.patience self._iter_callbacks = [] self.best_cost = 1e100 self.best_iter = 0 self.best_params = self.copy_params() self._skip_batches = 0 self._progress = 0 self.last_cost = 0 self.last_run_costs = None self._report_time = True def _compile_evaluation_func(self): if not self.evaluation_func: logging.info("compile evaluation function") self.evaluation_func = theano.function( self.network.input_variables + self.network.target_variables, self.evaluation_variables, updates=self.network.updates, allow_input_downcast=True, mode=self.config.get("theano_mode", None)) def skip(self, n_batches): """ Skip N batches in the training. """ logging.info("Skip %d batches" % n_batches) self._skip_batches = n_batches def _setup_costs(self): self.cost = self._add_regularization(self.network.cost) self.test_cost = self._add_regularization(self.network.test_cost) self.training_variables = [self.cost] self.training_names = ['J'] for name, monitor in self.network.training_monitors: self.training_names.append(name) self.training_variables.append(monitor) logging.info("monitor list: %s" % ",".join(self.training_names)) self.evaluation_variables = [self.test_cost] self.evaluation_names = ['J'] for name, monitor in self.network.testing_monitors: self.evaluation_names.append(name) self.evaluation_variables.append(monitor) def _add_regularization(self, cost): if self.config.weight_l1 > 0: logging.info("L1 weight regularization: %f" % self.config.weight_l1) cost += self.config.weight_l1 * sum(abs(w).sum() for w in self.network.parameters) if self.config.hidden_l1 > 0: logging.info("L1 hidden unit regularization: %f" % self.config.hidden_l1) cost += self.config.hidden_l1 * sum(abs(h).mean(axis=0).sum() for h in self.network._hidden_outputs) if self.config.hidden_l2 > 0: logging.info("L2 hidden unit regularization: %f" % self.config.hidden_l2) cost += self.config.hidden_l2 * sum((h * h).mean(axis=0).sum() for h in self.network._hidden_outputs) return cost def set_params(self, targets, free_params=None): for param, target in zip(self.network.parameters, targets): param.set_value(target) if free_params: for param, param_value in zip(self.network.free_parameters, free_params): param.set_value(param_value) def save_params(self, path): self.set_params(*self.best_params) self.network.save_params(path) def load_params(self, path, exclude_free_params=False): """ Load parameters for the training. This method can load free parameters and resume the training progress. """ self.network.load_params(path, exclude_free_params=exclude_free_params) self.best_params = self.copy_params() # Resume the progress if self.network.train_logger.progress() > 0: self.skip(self.network.train_logger.progress()) def copy_params(self): checkpoint = (map(lambda p: p.get_value().copy(), self.network.parameters), map(lambda p: p.get_value().copy(), self.network.free_parameters)) return checkpoint def add_iter_callback(self, func): """ Add a iteration callback function (receives an argument of the trainer). :return: """ self._iter_callbacks.append(func) def train(self, train_set, valid_set=None, test_set=None, train_size=None): """ Train the model and return costs. """ epoch = 0 while True: # Test if not epoch % self.config.test_frequency and test_set: try: self._run_test(epoch, test_set) except KeyboardInterrupt: logging.info('interrupted!') break # Validate if not epoch % self.validation_frequency and valid_set: try: if not self._run_valid(epoch, valid_set): logging.info('patience elapsed, bailing out') break except KeyboardInterrupt: logging.info('interrupted!') break # Train one step try: costs = self._run_train(epoch, train_set, train_size) except KeyboardInterrupt: logging.info('interrupted!') break # Check costs if np.isnan(costs[0][1]): logging.info("NaN detected in costs, rollback to last parameters") self.set_params(*self.checkpoint) else: epoch += 1 self.network.epoch_callback() yield dict(costs) if valid_set and self.config.get("save_best_parameters", True): self.set_params(*self.best_params) if test_set: self._run_test(-1, test_set) @abstractmethod def learn(self, *variables): """ Update the parameters and return the cost with given data points. :param variables: :return: """ def _run_test(self, iteration, test_set): """ Run on test iteration. """ costs = self.test_step(test_set) info = ' '.join('%s=%.2f' % el for el in costs) message = "test (epoch=%i) %s" % (iteration + 1, info) logging.info(message) self.network.train_logger.record(message) self.last_run_costs = costs def _run_train(self, iteration, train_set, train_size=None): """ Run one training iteration. """ costs = self.train_step(train_set, train_size) if not iteration % self.config.monitor_frequency: info = " ".join("%s=%.2f" % item for item in costs) message = "monitor (epoch=%i) %s" % (iteration + 1, info) logging.info(message) self.network.train_logger.record(message) self.last_run_costs = costs return costs def _run_valid(self, iteration, valid_set, dry_run=False): """ Run one valid iteration, return true if to continue training. """ costs = self.valid_step(valid_set) # this is the same as: (J_i - J_f) / J_i > min improvement _, J = costs[0] marker = "" if self.best_cost - J > self.best_cost * self.min_improvement: # save the best cost and parameters self.best_params = self.copy_params() marker = ' *' if not dry_run: self.best_cost = J self.best_iter = iteration if self.config.auto_save: self.network.train_logger.record_progress(self._progress) self.network.save_params(self.config.auto_save, new_thread=True) info = ' '.join('%s=%.2f' % el for el in costs) epoch = "epoch=%d" % (iteration + 1) if dry_run: epoch = "dryrun" + " " * (len(epoch) - 6) message = "valid (%s) %s%s" % (epoch, info, marker) logging.info(message) self.last_run_costs = costs self.network.train_logger.record(message) self.checkpoint = self.copy_params() return iteration - self.best_iter < self.patience def test_step(self, test_set): self._compile_evaluation_func() costs = list(zip( self.evaluation_names, np.mean([self.evaluation_func(*x) for x in test_set], axis=0))) return costs def valid_step(self, valid_set): self._compile_evaluation_func() costs = list(zip( self.evaluation_names, np.mean([self.evaluation_func(*x) for x in valid_set], axis=0))) return costs def train_step(self, train_set, train_size=None): dirty_trick_times = 0 network_callback = bool(self.network.training_callbacks) trainer_callback = bool(self._iter_callbacks) cost_matrix = [] self._progress = 0 for x in train_set: if self._skip_batches == 0: if dirty_trick_times > 0: cost_x = self.learn(*[t[:(t.shape[0]/2)] for t in x]) cost_matrix.append(cost_x) cost_x = self.learn(*[t[(t.shape[0]/2):] for t in x]) dirty_trick_times -= 1 else: try: cost_x = self.learn(*x) except MemoryError: logging.info("Memory error was detected, perform dirty trick 30 times") dirty_trick_times = 30 # Dirty trick cost_x = self.learn(*[t[:(t.shape[0]/2)] for t in x]) cost_matrix.append(cost_x) cost_x = self.learn(*[t[(t.shape[0]/2):] for t in x]) cost_matrix.append(cost_x) self.last_cost = cost_x[0] if network_callback: self.network.training_callback() if trainer_callback: for func in self._iter_callbacks: func(self) else: self._skip_batches -= 1 if train_size: self._progress += 1 sys.stdout.write("\x1b[2K\r> %d%% | J=%.2f" % (self._progress * 100 / train_size, self.last_cost)) sys.stdout.flush() self._progress = 0 if train_size: sys.stdout.write("\r") sys.stdout.flush() costs = list(zip(self.training_names, np.mean(cost_matrix, axis=0))) return costs def run(self, train_set, valid_set=None, test_set=None, train_size=None, controllers=None): """ Run until the end. """ if isinstance(train_set, Dataset): dataset = train_set train_set = dataset.train_set() valid_set = dataset.valid_set() test_set = dataset.test_set() train_size = dataset.train_size() timer = Timer() for _ in self.train(train_set, valid_set=valid_set, test_set=test_set, train_size=train_size): if controllers: ending = False for controller in controllers: if hasattr(controller, 'invoke') and controller.invoke(): ending = True if ending: break if self._report_time: timer.report()
if __name__ == '__main__': ap = ArgumentParser() ap.add_argument("--model", default=os.path.join(os.path.dirname(__file__), "models", "sequence_adding_100_2.gz")) args = ap.parse_args() model = NeuralRegressor(input_dim=2, input_tensor=3) model.stack(IRNN(hidden_size=100, input_type="sequence", output_type="one"), Dense(1)) if os.path.exists(args.model): model.load_params(args.model) conf = TrainerConfig() conf.learning_rate = LearningRateAnnealer.learning_rate(0.01) conf.gradient_clipping = 3 conf.patience = 50 conf.gradient_tolerance = 5 conf.avoid_nan = False trainer = SGDTrainer(model, conf) annealer = LearningRateAnnealer(patience=20) trainer.run(batch_set, controllers=[annealer]) model.save_params(args.model) print "Identity matrix weight:" print model.first_layer().W_h.get_value().diagonal()
class NeuralTrainer(object): """ A base class for all trainers. """ __metaclass__ = ABCMeta def __init__(self, network, config=None): """ Basic neural network trainer. :type network: deepy.NeuralNetwork :type config: deepy.conf.TrainerConfig :return: """ super(NeuralTrainer, self).__init__() self.config = None if isinstance(config, TrainerConfig): self.config = config elif isinstance(config, dict): self.config = TrainerConfig(config) else: self.config = TrainerConfig() # Model and network all refer to the computational graph self.model = self.network = network self.network.prepare_training() self._setup_costs() self.evaluation_func = None self.validation_frequency = self.config.validation_frequency self.min_improvement = self.config.min_improvement self.patience = self.config.patience self._iter_callbacks = [] self.best_cost = 1e100 self.best_iter = 0 self.best_params = self.copy_params() self._skip_batches = 0 self._progress = 0 self.last_cost = 0 def _compile_evaluation_func(self): if not self.evaluation_func: logging.info("compile evaluation function") self.evaluation_func = theano.function( self.network.input_variables + self.network.target_variables, self.evaluation_variables, updates=self.network.updates, allow_input_downcast=True, mode=self.config.get("theano_mode", None)) def skip(self, n_batches): """ Skip N batches in the training. """ logging.info("Skip %d batches" % n_batches) self._skip_batches = n_batches def _setup_costs(self): self.cost = self._add_regularization(self.network.cost) self.test_cost = self._add_regularization(self.network.test_cost) self.training_variables = [self.cost] self.training_names = ['J'] for name, monitor in self.network.training_monitors: self.training_names.append(name) self.training_variables.append(monitor) logging.info("monitor list: %s" % ",".join(self.training_names)) self.evaluation_variables = [self.test_cost] self.evaluation_names = ['J'] for name, monitor in self.network.testing_monitors: self.evaluation_names.append(name) self.evaluation_variables.append(monitor) def _add_regularization(self, cost): if self.config.weight_l1 > 0: logging.info("L1 weight regularization: %f" % self.config.weight_l1) cost += self.config.weight_l1 * sum( abs(w).sum() for w in self.network.parameters) if self.config.hidden_l1 > 0: logging.info("L1 hidden unit regularization: %f" % self.config.hidden_l1) cost += self.config.hidden_l1 * sum( abs(h).mean(axis=0).sum() for h in self.network._hidden_outputs) if self.config.hidden_l2 > 0: logging.info("L2 hidden unit regularization: %f" % self.config.hidden_l2) cost += self.config.hidden_l2 * sum( (h * h).mean(axis=0).sum() for h in self.network._hidden_outputs) return cost def set_params(self, targets, free_params=None): for param, target in zip(self.network.parameters, targets): param.set_value(target) if free_params: for param, param_value in zip(self.network.free_parameters, free_params): param.set_value(param_value) def save_params(self, path): self.set_params(*self.best_params) self.network.save_params(path) def load_params(self, path, exclude_free_params=False): """ Load parameters for the training. This method can load free parameters and resume the training progress. """ self.network.load_params(path, exclude_free_params=exclude_free_params) self.best_params = self.copy_params() # Resume the progress if self.network.train_logger.progress() > 0: self.skip(self.network.train_logger.progress()) def copy_params(self): checkpoint = (map(lambda p: p.get_value().copy(), self.network.parameters), map(lambda p: p.get_value().copy(), self.network.free_parameters)) return checkpoint def add_iter_callback(self, func): """ Add a iteration callback function (receives an argument of the trainer). :return: """ self._iter_callbacks.append(func) def train(self, train_set, valid_set=None, test_set=None, train_size=None): """ Train the model and return costs. """ iteration = 0 while True: # Test if not iteration % self.config.test_frequency and test_set: try: self._run_test(iteration, test_set) except KeyboardInterrupt: logging.info('interrupted!') break # Validate if not iteration % self.validation_frequency and valid_set: try: if not self._run_valid(iteration, valid_set): logging.info('patience elapsed, bailing out') break except KeyboardInterrupt: logging.info('interrupted!') break # Train one step try: costs = self._run_train(iteration, train_set, train_size) except KeyboardInterrupt: logging.info('interrupted!') break # Check costs if np.isnan(costs[0][1]): logging.info( "NaN detected in costs, rollback to last parameters") self.set_params(*self.checkpoint) else: iteration += 1 self.network.epoch_callback() yield dict(costs) if valid_set and self.config.get("save_best_parameters", True): self.set_params(*self.best_params) if test_set: self._run_test(-1, test_set) @abstractmethod def learn(self, *variables): """ Update the parameters and return the cost with given data points. :param variables: :return: """ def _run_test(self, iteration, test_set): """ Run on test iteration. """ costs = self.test_step(test_set) info = ' '.join('%s=%.2f' % el for el in costs) message = "test (iter=%i) %s" % (iteration + 1, info) logging.info(message) self.network.train_logger.record(message) def _run_train(self, iteration, train_set, train_size=None): """ Run one training iteration. """ costs = self.train_step(train_set, train_size) if not iteration % self.config.monitor_frequency: info = " ".join("%s=%.2f" % item for item in costs) message = "monitor (iter=%i) %s" % (iteration + 1, info) logging.info(message) self.network.train_logger.record(message) return costs def _run_valid(self, iteration, valid_set, dry_run=False): """ Run one valid iteration, return true if to continue training. """ costs = self.valid_step(valid_set) # this is the same as: (J_i - J_f) / J_i > min improvement _, J = costs[0] marker = "" if self.best_cost - J > self.best_cost * self.min_improvement: # save the best cost and parameters self.best_params = self.copy_params() marker = ' *' if not dry_run: self.best_cost = J self.best_iter = iteration if self.config.auto_save: self.network.train_logger.record_progress(self._progress) self.network.save_params(self.config.auto_save, new_thread=True) info = ' '.join('%s=%.2f' % el for el in costs) iter_str = "iter=%d" % (iteration + 1) if dry_run: iter_str = "dryrun" + " " * (len(iter_str) - 6) message = "valid (%s) %s%s" % (iter_str, info, marker) logging.info(message) self.network.train_logger.record(message) self.checkpoint = self.copy_params() return iteration - self.best_iter < self.patience def test_step(self, test_set): self._compile_evaluation_func() costs = list( zip(self.evaluation_names, np.mean([self.evaluation_func(*x) for x in test_set], axis=0))) return costs def valid_step(self, valid_set): self._compile_evaluation_func() costs = list( zip(self.evaluation_names, np.mean([self.evaluation_func(*x) for x in valid_set], axis=0))) return costs def train_step(self, train_set, train_size=None): dirty_trick_times = 0 network_callback = bool(self.network.training_callbacks) trainer_callback = bool(self._iter_callbacks) cost_matrix = [] self._progress = 0 for x in train_set: if self._skip_batches == 0: if dirty_trick_times > 0: cost_x = self.learn(*[t[:(t.shape[0] / 2)] for t in x]) cost_matrix.append(cost_x) cost_x = self.learn(*[t[(t.shape[0] / 2):] for t in x]) dirty_trick_times -= 1 else: try: cost_x = self.learn(*x) except MemoryError: logging.info( "Memory error was detected, perform dirty trick 30 times" ) dirty_trick_times = 30 # Dirty trick cost_x = self.learn(*[t[:(t.shape[0] / 2)] for t in x]) cost_matrix.append(cost_x) cost_x = self.learn(*[t[(t.shape[0] / 2):] for t in x]) cost_matrix.append(cost_x) self.last_cost = cost_x[0] if network_callback: self.network.training_callback() if trainer_callback: for func in self._iter_callbacks: func(self) else: self._skip_batches -= 1 if train_size: self._progress += 1 sys.stdout.write( "\x1b[2K\r> %d%% | J=%.2f" % (self._progress * 100 / train_size, self.last_cost)) sys.stdout.flush() self._progress = 0 if train_size: sys.stdout.write("\r") sys.stdout.flush() costs = list(zip(self.training_names, np.mean(cost_matrix, axis=0))) return costs def run(self, train_set, valid_set=None, test_set=None, train_size=None, controllers=None): """ Run until the end. """ if isinstance(train_set, Dataset): dataset = train_set train_set = dataset.train_set() valid_set = dataset.valid_set() test_set = dataset.test_set() train_size = dataset.train_size() timer = Timer() for _ in self.train(train_set, valid_set=valid_set, test_set=test_set, train_size=train_size): if controllers: ending = False for controller in controllers: if hasattr(controller, 'invoke') and controller.invoke(): ending = True if ending: break timer.report() return
model = L2HingeNeuralClassifier(input_dim=28*28, last_layer_l2_regularization = l2_reg) model.stack(Dropout(p=dropout_p_0), Dense(n, init=init, disable_bias=True), BatchNormalization(), Activation(activation)) #model.stack(Dropout(p=dropout_p_0), BatchNormalization()) for _ in range(T): #model.stack(HighwayLayerLRDropoutBatchNorm(activation=activation, gate_bias=gate_bias, projection_dim=d, d_p_0 = dropout_p_h_0, d_p_1 = dropout_p_h_1, init=init)) model.stack(HighwayLayerLRDiagDropoutBatchNorm(activation=activation, gate_bias=gate_bias, projection_dim=d, d_p_0 = dropout_p_h_0, d_p_1 = dropout_p_h_1, init=init, quasi_ortho_init=True)) #model.stack(BatchNormalization(),Dropout(p=dropout_p_2), Dense(10, init=init)) model.stack(Dropout(p=dropout_p_2), Dense(10, init=init)) learning_rate_start = 3e-3 #learning_rate_target = 3e-7 #learning_rate_epochs = 100 #learning_rate_decay = (learning_rate_target / learning_rate_start) ** (1.0 / learning_rate_epochs) conf = TrainerConfig() conf.learning_rate = LearningRateAnnealer.learning_rate(learning_rate_start) #conf.gradient_clipping = 1 conf.patience = 20 #conf.gradient_tolerance = 5 conf.avoid_nan = True conf.min_improvement = 1e-10 #trainer = MomentumTrainer(model) trainer = AdamTrainer(model, conf) mnist = MiniBatches(MnistDataset(), batch_size=100) #mnist = MiniBatches(MnistDatasetSmallValid(), batch_size=100) #trainer.run(mnist, controllers=[IncrementalLearningRateAnnealer(trainer, 0, learning_rate_decay)]) trainer.run(mnist, controllers=[LearningRateAnnealer(trainer, 3, 14)])
import numpy as np from deepy.layers.recurrent import RecurrentLayer, RecurrentNetwork from deepy.conf import NetworkConfig, TrainerConfig from deepy.utils.functions import FLOATX from deepy import SGDTrainer logging.basicConfig(level=logging.INFO) if __name__ == '__main__': net_conf = NetworkConfig(input_size=6) net_conf.layers = [RecurrentLayer(size=10, activation='sigmoid', bptt=True)] trainer_conf = TrainerConfig() trainer_conf.learning_rate = 0.03 trainer_conf.weight_l2 = 0.0001 trainer_conf.hidden_l2 = 0.0001 trainer_conf.monitor_frequency = trainer_conf.validation_frequency = trainer_conf.test_frequency = 1 network = RecurrentNetwork(net_conf) trainer = SGDTrainer(network) data = np.array([[1,0,0,0,0,0], [0,1,0,0,0,0], [0,0,1,0,0,0], [0,0,0,1,0,0], [0,0,0,0,1,0], [0,0,0,0,0,1], [0,1,0,0,0,0],
batch_set = MiniBatches(dataset, batch_size=32) if __name__ == '__main__': ap = ArgumentParser() ap.add_argument("--model", default=os.path.join(os.path.dirname(__file__), "models", "sequence_adding_100_2.gz")) args = ap.parse_args() model = NeuralRegressor(input_dim=2, input_tensor=3, clip_value=3.) model.stack_layer(IRNN(hidden_size=100, output_size=1, input_type="sequence", output_type="one", output_activation="linear")) if os.path.exists(args.model): model.load_params(args.model) conf = TrainerConfig() conf.learning_rate = LearningRateAnnealer.learning_rate(0.01) conf.max_norm = 1 conf.patience = 50 conf.gradient_tolerance = 5 trainer = SGDTrainer(model, conf) annealer = LearningRateAnnealer(trainer, patience=20) trainer.run(batch_set, controllers=[annealer]) model.save_params(args.model) print "Identity matrix weight:" print model.first_layer().W_h.get_value().diagonal()