示例#1
0
  def on_epoch_begin(self, epoch, logs=None):
    # TODO(yashkatariya): Change the property checking when the learning
    # rate attribute is unified across all TF Optimizers.
    if isinstance(self.model.optimizer, optimizers.TFOptimizer):
      if not hasattr(self.model.optimizer.optimizer, '_lr') and not hasattr(
          self.model.optimizer.optimizer, '_learning_rate'):
        raise ValueError(
            'TF Optimizer must have a "_lr" or "_learning_rate" attribute.')
      else:
        opt = self.model.optimizer.optimizer
        if hasattr(opt, '_lr'):
          opt_lr = Variable(opt._lr)  # pylint: disable=protected-access
        elif hasattr(opt, '_learning_rate'):
          opt_lr = Variable(opt._learning_rate)  # pylint: disable=protected-access
    else:
      if not hasattr(self.model.optimizer, 'lr'):
        raise ValueError('Optimizer must have a "lr" attribute.')
      else:
        opt = self.model.optimizer
        opt_lr = opt.lr

    try:  # new API
      lr = float(K.get_value(opt_lr))
      lr = self.schedule(epoch, lr)
    except TypeError:  # Support for old API for backward compatibility
      lr = self.schedule(epoch)
    if not isinstance(lr, (float, np.float32, np.float64)):
      raise ValueError('The output of the "schedule" function '
                       'should be float.')
    K.set_value(opt_lr, lr)
    if self.verbose > 0:
      print('\nEpoch %05d: LearningRateScheduler reducing learning '
            'rate to %s.' % (epoch + 1, lr))
示例#2
0
  def on_epoch_end(self, epoch, logs=None):
    logs = logs or {}
    logs['lr'] = K.get_value(self.model.optimizer.lr)
    current = logs.get(self.monitor)
    if current is None:
      logging.warning('Reduce LR on plateau conditioned on metric `%s` '
                      'which is not available. Available metrics are: %s',
                      self.monitor, ','.join(list(logs.keys())))

    else:
      if self.in_cooldown():
        self.cooldown_counter -= 1
        self.wait = 0

      if self.monitor_op(current, self.best):
        self.best = current
        self.wait = 0
      elif not self.in_cooldown():
        self.wait += 1
        if self.wait >= self.patience:
          old_lr = float(K.get_value(self.model.optimizer.lr))
          if old_lr > self.min_lr:
            new_lr = old_lr * self.factor
            new_lr = max(new_lr, self.min_lr)
            K.set_value(self.model.optimizer.lr, new_lr)
            if self.verbose > 0:
              print('\nEpoch %05d: ReduceLROnPlateau reducing learning '
                    'rate to %s.' % (epoch + 1, new_lr))
            self.cooldown_counter = self.cooldown
            self.wait = 0
示例#3
0
  def reset_states(self):
    """Resets all of the metric state variables.

    This function is called between epochs/steps,
    when a metric is evaluated during training.
    """
    for v in self.variables:
      K.set_value(v, 0)
示例#4
0
 def _set_hyper(self, name, value):
   """set hyper `name` to value. value can be callable, tensor, numeric."""
   if name not in self._hyper:
     self._hyper[name] = value
   else:
     prev_value = self._hyper[name]
     if callable(prev_value) or isinstance(prev_value,
                                           (ops.Tensor, int, float)):
       self._hyper[name] = value
     else:
       backend.set_value(self._hyper[name], value)
示例#5
0
 def on_epoch_begin(self, epoch, logs=None):
   if not hasattr(self.model.optimizer, 'lr'):
     raise ValueError('Optimizer must have a "lr" attribute.')
   lr = self.schedule(epoch)
   if not isinstance(lr, (float, np.float32, np.float64)):
     raise ValueError('The output of the "schedule" function '
                      'should be float.')
   K.set_value(self.model.optimizer.lr, lr)
   if self.verbose > 0:
     print('\nEpoch %05d: LearningRateScheduler reducing learning '
           'rate to %s.' % (epoch + 1, lr))
示例#6
0
 def _set_hyper(self, name, value):
   """set hyper `name` to value. value can be callable, tensor, numeric."""
   if name not in self._hyper:
     self._hyper[name] = value
   else:
     prev_value = self._hyper[name]
     if (callable(prev_value)
         or isinstance(prev_value,
                       (ops.Tensor, int, float,
                        learning_rate_schedule.LearningRateSchedule))
         or isinstance(value, learning_rate_schedule.LearningRateSchedule)):
       self._hyper[name] = value
     else:
       backend.set_value(self._hyper[name], value)
示例#7
0
    def on_epoch_end(self, epoch, logs=None):
        if epoch < self.warmup_steps:
            return

        def _get_lr():
            try:
                lr = K.get_value(self.model.optimizer.lr)
            except ValueError:
                lr = self.model.lr(self.model.optimizer.iterations)
            finally:
                lr = None

            return lr

        logs = logs or {}
        current = logs.get(self.monitor)
        if current is None:
            logging.warning(
                f'ReduceLROnPlateau conditioned on metric'
                f' {self.monitor} which is not available.'
                f' Available metrics are: {",".join(list(logs.keys()))}')

        else:
            if self.in_cooldown():
                self.cooldown_counter -= 1
                self.wait = 0
            if self.monitor_op(current, self.best):
                self.best = current
                self.wait = 0
            elif not self.in_cooldown():
                self.wait += 1
                if self.wait >= self.patience:
                    step = self.model.optimizer.iterations
                    old_lr = self.model._get_lr(step)
                    if old_lr > self.min_lr:
                        new_lr = old_lr * self.factor
                        new_lr = max(new_lr, self.min_lr)
                        K.set_value(self.model.optimizer.lr, new_lr)
                        if self.verbose > 0:
                            logging.warning(
                                f'ReduceLROnPlateau (step {epoch}):'
                                ' Reducing learning rate from:'
                                f' {old_lr} to {new_lr:g}.')
                            print(f'current: {current}, best: {self.best}')
                            #  print(f'\nstep {epoch}: ReduceLROnPlateau'
                            #        ' reducing learning rate from:'
                            #        f' {old_lr} to {new_lr:g}.')
                        self.cooldown_counter = self.cooldown
                        self.wait = 0
示例#8
0
 def on_epoch_begin(self, epoch, logs=None):
   if not hasattr(self.model.optimizer, 'lr'):
     raise ValueError('Optimizer must have a "lr" attribute.')
   try:  # new API
     lr = float(K.get_value(self.model.optimizer.lr))
     lr = self.schedule(epoch, lr)
   except TypeError:  # Support for old API for backward compatibility
     lr = self.schedule(epoch)
   if not isinstance(lr, (float, np.float32, np.float64)):
     raise ValueError('The output of the "schedule" function '
                      'should be float.')
   K.set_value(self.model.optimizer.lr, lr)
   if self.verbose > 0:
     print('\nEpoch %05d: LearningRateScheduler reducing learning '
           'rate to %s.' % (epoch + 1, lr))
示例#9
0
 def on_epoch_begin(self, epoch, logs=None):
     if not hasattr(self.model.optimizer, 'lr'):
         raise ValueError('Optimizer must have a "lr" attribute.')
     try:  # new API
         lr = float(K.get_value(self.model.optimizer.lr))
         lr = self.schedule(epoch, lr)
     except TypeError:  # Support for old API for backward compatibility
         lr = self.schedule(epoch)
     if not isinstance(lr, (float, np.float32, np.float64)):
         raise ValueError('The output of the "schedule" function '
                          'should be float.')
     K.set_value(self.model.optimizer.lr, lr)
     if self.verbose > 0:
         print('\nEpoch %05d: LearningRateScheduler reducing learning '
               'rate to %s.' % (epoch + 1, lr))
示例#10
0
 def testLRBackwardsCompatibility(self):
   """This tests learning rate getting/setting used by old Keras callbacks."""
   opt = optimizers.Kfac(
       learning_rate=3, damping=5, model=_simple_mlp(), loss='mse')
   self.assertEqual(backend.get_value(opt.lr), 3)
   self.assertEqual(backend.get_value(opt.learning_rate), 3)
   opt.lr = 7
   self.assertEqual(backend.get_value(opt.lr), 7)
   self.assertEqual(backend.get_value(opt.learning_rate), 7)
   backend.set_value(opt.lr, 9)
   self.assertEqual(backend.get_value(opt.lr), 9)
   self.assertEqual(backend.get_value(opt.learning_rate), 9)
   backend.set_value(opt.learning_rate, 11)
   self.assertEqual(backend.get_value(opt.lr), 11)
   self.assertEqual(backend.get_value(opt.learning_rate), 11)
 def on_epoch_begin(self, state):
     # check whetehr the current epoch is in smooth transition of resolutions
     fade_epoch = self.fade_start[self._idx]
     if state["epoch"] == fade_epoch:
         self.nimg_total = self.duration[self._idx] * state["num_examples"]
         self.change_alpha = True
         self.nimg_so_far = 0
         print("FastEstimator-Alpha: Started fading in for size {}".format(
             2**(self._idx + 3)))
     elif state["epoch"] == fade_epoch + self.duration[self._idx]:
         print("FastEstimator-Alpha: Finished fading in for size {}".format(
             2**(self._idx + 3)))
         self.change_alpha = False
         self._idx += 1
         backend.set_value(self.alpha, 1.0)
示例#12
0
    def train_models(self):
        style_id = 0
        new_lr = self.LR_Initial
        for step in range(self.N_steps):
            style_ids = [style_id for i in range(self.Batch_Size)]
            batch_ids = self.get_batch_ids(self.Batch_Size, self.n_content)
            # Load the DB
            print("Loading DB, step {}...".format(step), end='')
            self.Content_DB = np.array([
                resize(imread(self.Content_DB_list[batch_id]),
                       self.img_shape[1:]) for batch_id in batch_ids
            ])

            style_im = resize(imread(self.Style_DB_list[style_id]),
                              self.img_shape[1:])
            self.Style_DB = np.array([style_im for style_id in style_ids])

            print("Finished Loading DB")
            if step % (self.T + 1) != self.T:  # Train Style
                loss_style = self.StyleNet[style_id].train_on_batch(
                    self.Content_DB, self.Style_DB)
                self.TensorBoardStyleNet[style_id].on_epoch_end(
                    step, self.named_logs(self.StyleNet[style_id], loss_style))
            else:  # Train AE
                loss_autoencoder = self.AutoEncoderNet.train_on_batch(
                    self.Content_DB, self.Content_DB)
                self.TensorBoardAutoEncoder.on_epoch_end(
                    step, self.named_logs(self.AutoEncoderNet,
                                          loss_autoencoder))
                style_id += 1
                style_id = style_id % self.n_styles
            if step % self.print_iter == 0 and step != 0:
                print(
                    "step {0}, loss_style={1}, loss_autoencoder={2}, timestamp={3}"
                    .format(step, loss_style, loss_autoencoder,
                            datetime.now()))
            if step % self.LR_Update_Every == 0 and step != 0:
                new_lr = new_lr * self.LR_Decay
                self.LR_Current = new_lr
                for i in self.style_bank:
                    K.set_value(self.StyleNet[i].optimizer.lr, new_lr)
                K.set_value(self.AutoEncoderNet.optimizer.lr, new_lr)
                print("Updating LR to: StyleNet={}, AutoEncoder={}".format(
                    K.eval(self.StyleNet[0].optimizer.lr),
                    K.eval(self.AutoEncoderNet.optimizer.lr)))
        for i in self.style_bank:
            self.TensorBoardStyleNet[i].on_train_end(None)
        self.TensorBoardAutoEncoder.on_train_end(None)
示例#13
0
 def _set_hyper(self, name, value):
     """set hyper `name` to value. value can be callable, tensor, numeric."""
     if isinstance(value, trackable.Trackable):
         self._track_trackable(value, name, overwrite=True)
     if name not in self._hyper:
         self._hyper[name] = value
     else:
         prev_value = self._hyper[name]
         if (callable(prev_value) or isinstance(
                 prev_value,
             (ops.Tensor, int, float,
              learning_rate_schedule.LearningRateSchedule)) or isinstance(
                  value, learning_rate_schedule.LearningRateSchedule)):
             self._hyper[name] = value
         else:
             backend.set_value(self._hyper[name], value)
 def set_tfidf_data(self, tfidf_data):
   tfidf_data = self._convert_to_ndarray(tfidf_data)
   if self._output_mode != TFIDF:
     raise RuntimeError(
         "In order to set TF-IDF data, the output mode must be 'tf-idf'.")
   if tfidf_data.ndim != 1:
     raise ValueError("TF-IDF data must be a 1-index array.")
   if self._max_tokens is not None:
     input_data_length = tfidf_data.shape[0]
     if input_data_length > self._max_tokens:
       raise ValueError("The array provided has %d elements. This layer is "
                        "configured to only allow %d elements." %
                        (input_data_length, self._max_tokens))
     if input_data_length < self._max_tokens:
       tfidf_data = np.resize(tfidf_data, (self._max_tokens,))
   K.set_value(self.tf_idf_weights, tfidf_data)
 def on_epoch_end(self, loss, epoch):
     if(self.best > loss):
         self.wait=0
         self.best = loss
     else:
         self.wait = self.wait+1
         
         cur_lr = K.get_value(self.optim.lr)
         if(cur_lr > self.min_lr):
             if(self.wait > self.patience):
                 self.wait=0
                 new_lr = cur_lr*self.factor
                 new_lr = max(new_lr, self.min_lr)
                 K.set_value(self.optim.lr, new_lr)
                 
                 print("Epoch {}: ReduceLROnPlateau reducing learning rate to {}".format(epoch, new_lr))
     return
    def on_epoch_begin(self, epoch, logs=None):
        train_gen = TrainDataGenerator(batch_size=self.batch_size,
                                       num_samples=self.num_samples,
                                       rndgray=True)
        val_gen = ValDataGenerator(batch_size=self.batch_size,
                                   rndgray=True)
        self.model.save_weights(self.tmp_weights_path)
        self.losses = []
        history = self.model.history
        current_lr = float(K.get_value(self.model.optimizer.lr))
        neighborhood = int(np.ceil(self.neighborhood_fraction * self.num_lr))
        local_lr_start = current_lr * (10 ** -self.neighborhood_log_range) * (self.epoch_range_factor ** (epoch))
        local_lr_end = current_lr * (10 ** self.neighborhood_log_range) * (self.epoch_range_factor ** (epoch))
        global_geomspace = np.geomspace(self.min_lr, self.max_lr, self.num_lr - neighborhood)
        local_geomspace = np.geomspace(local_lr_start, local_lr_end, neighborhood)
        self.learning_rates = np.sort(np.concatenate([global_geomspace, local_geomspace]))

        if self.verbose:
            print('\n--- Searching among {} learning rates ---'.format(self.num_lr))
        for lr in self.learning_rates:
            history = self.model.fit_generator(generator=train_gen,
                                               validation_data=val_gen,
                                               epochs=1,
                                               use_multiprocessing=True,
                                               workers=12,
                                               verbose=0)
            self.losses.append(history.history['val_loss'][0])
            if self.verbose:
                print('\t> Validation loss for lr={:.3e}: {:.3f}'.format(lr, history.history['val_loss'][0]))
            self.model.load_weights(self.tmp_weights_path)

        best_lr = self.learning_rates[np.argmin(self.losses)]
        K.set_value(self.model.optimizer.lr, best_lr)
        self.model.history = history
        if self.verbose:
            print('\n\tBest learning rate: {:.3e}\n'.format(best_lr))

        if self.epoch_lr_graph:
            plt.figure(figsize=(12, 6))
            plt.plot(self.learning_rates[:len(self.losses)], self.losses, '#800000')
            plt.xlabel("Learning Rate")
            plt.ylabel("Loss")
            plt.xscale('log')
            plt.title('Learning Rate Discovery (Epoch {})'.format(epoch + 1))
            plt.savefig(os.path.join(self.visualization_dir,
                                     '{}_epoch_{:0>3d}.jpg'.format(self.visualization_prefix, epoch + 1)))
def retrain_model(model_file,
                  checkpoint,
                  eval_data,
                  metric,
                  weights_file=None,
                  custom_objects=None,
                  lr=None):
    """
    Load a previously trained model and continue training
    Parameters:
        model_file (str): path to model .h5 file
        lr (float): initial learning rate
        eval_data (tf.Dataset): data on which to calculate starting metrics
        metric (str): metric name for checkpoint logging
        weights_file (str): path to .hdf5 model weights file
    Return:
        keras.Model: 
    """
    def get_weighted_bce(y_true, y_pred):
        return weighted_bce(y_true, y_pred, weight)

    def get_gen_dice(y_true, y_pred):
        return gen_dice(y_true, y_pred, global_weights=weight)

    if custom_objects:
        # custom_objects = {'get_weighted_bce': get_weighted_bce}
        custom_objects = custom_objects
    else:
        custom_objects = {}

    # load our previously trained model and weights
    m = models.load_model(model_file, custom_objects=custom_objects)
    if weights_file:
        m.load_weights(weights_file)
    # set the initial evaluation metric for saving checkpoints to the previous best value
    evalMetrics = m.evaluate(x=eval_data, verbose=1)
    metrics = m.metrics_names
    index = metrics.index(metric)
    checkpoint.best = evalMetrics[index]
    # set the learning rate for re-training
    if not lr:
        lr = backend.eval(m.optimizer.learning_rate)
    backend.set_value(m.optimizer.learning_rate, lr)

    return m, checkpoint
示例#18
0
 def switch(self, switch_flag=None):
     '''
     Switch the phase of the optimizer.
     Arguments:
         switch_flag: if set `True`, use SGD with nesterov momentum; Otherwise,
         use NAdam/NAmsgrad. If set None, it would switch the phase according to
         the current phase.
     '''
     if switch_flag is None:
         switch_flag = not bool(K.get_value(self.switch_flag))
     else:
         switch_flag = bool(switch_flag)
     if switch_flag:  # using NSGD
         self.beta_g = K.set_value(self.beta_g, 1.0)
     else:  # using Nadam
         self.beta_g = K.set_value(self.beta_g,
                                   1.0 - K.get_value(self.beta_1))
     K.set_value(self.switch_flag, bool(switch_flag))
示例#19
0
    def on_train_batch_begin(self, batch, logs=None):

        if not hasattr(self.model.optimizer, 'lr'):
            raise ValueError('Optimizer must have a "lr" attribute.')
        try:  # new API
            lr_o = float(K.get_value(self.model.optimizer.lr))
            lr = self.schedule(self._total_batches_seen_lr, lr_o)
        except TypeError:  # Support for old API for backward compatibility
            raise TypeError('The schedule function accepts two arguments - iteratnion and last learning rate')
        if not isinstance(lr, (ops.Tensor, float, np.float32, np.float64)):
            raise ValueError('The output of the "schedule" function '
                            'should be float.')
        if isinstance(lr, ops.Tensor) and not lr.dtype.is_floating:
            raise ValueError('The dtype of Tensor should be float')
        K.set_value(self.model.optimizer.lr, K.get_value(lr))
        if self.verbose > 0 and lr_o != lr:
            print('\nIteration %05d: LearningRateScheduler reducing learning '
                    'rate to %s.' % (self._total_batches_seen_lr + 1, lr))
示例#20
0
 def on_batch_begin(self, batch, logs=None):
     global_step = self.epoch * self.steps_per_epoch + batch
     if not hasattr(self.model.optimizer, 'lr'):
         raise ValueError('Optimizer must have a "lr" attribute.')
     last_lr = float(K.get_value(self.model.optimizer.lr))
     if global_step % 1000 == 0:
         print('lr-batch-epoch: %.4f %s %s' % (last_lr, batch, self.epoch))
     lr = self.schedule(batch, last_lr)
     if not isinstance(lr, (float, np.float32, np.float64)):
         raise ValueError('The output of the "schedule" function '
                          'should be float.')
     if last_lr != lr:
         K.set_value(self.model.optimizer.lr, lr)
         logs = logs or {}
         logs['lr'] = K.get_value(self.model.optimizer.lr)
         if self.verbose > 0:
             print('\nStep %05d: LearningRateScheduler reducing learning '
                   'rate to %s.' % (global_step + 1, lr))
示例#21
0
 def _update_weights(self, x):
     k = -1
     for layer in self._layers:
         w_list = []
         w_trainable = [w.trainable for w in layer.weights]
         batch_update = False not in w_trainable
         for w in layer.weights:
             if not w.trainable:
                 continue
             k += 1
             shape = w.get_shape()
             value = np.array(x[k]).reshape(shape)
             if batch_update:
                 w_list.append(value)
             else:
                 K.set_value(w, value)
         if batch_update:
             layer.set_weights(w_list)
示例#22
0
    def on_epoch_begin(self, epoch, logs={}):
        if epoch % self.epoch_limit == 0:
            print("\n\nTRAINING BRANCH " + str(self.branch_num) + "\n\n")
            for n in self.base_name_arr:
                #Set current branch to true
                K.set_value(
                    self.model.get_layer(str(self.branch_num) + n).trainable,
                    True)
                #Set previous branch to false (if not first branch)
                if self.branch_num > 0:
                    K.set_value(
                        self.model.get_layer(str(self.branch_num - 1) +
                                             n).trainable, False)

            self.branch_num += 1
            #Iterate through layers to double check 'trainable'
            #Comment out when debugged
            for layer in model.layers:
                print(layer, layer.trainable)
示例#23
0
    def find_generator(
        self, generator, start_lr, end_lr, epochs=1, steps_per_epoch=None, **kw_fit
    ):
        if steps_per_epoch is None:
            try:
                steps_per_epoch = len(generator)
            except (ValueError, NotImplementedError) as e:
                raise e(
                    "`steps_per_epoch=None` is only valid for a"
                    " generator based on the "
                    "`keras.utils.Sequence`"
                    " class. Please specify `steps_per_epoch` "
                    "or use the `keras.utils.Sequence` class."
                )
        self.lr_mult = (end_lr / float(start_lr)) ** (1.0 / (epochs * steps_per_epoch))

        # Save weights into a file
        self.model.save_weights("experiments/tmp.h5")

        # Remember the original learning rate
        original_lr = K.get_value(self.model.optimizer.lr)

        # Set the initial learning rate
        K.set_value(self.model.optimizer.lr, start_lr)

        callback = LambdaCallback(
            on_batch_end=lambda batch, logs: self.on_batch_end(batch, logs)
        )
        kw_fit["callbacks"] = kw_fit.get("callbacks", []) + [callback]

        self.model.fit_generator(
            generator=generator,
            epochs=epochs,
            steps_per_epoch=steps_per_epoch,
            **kw_fit
        )

        # Restore the weights to the state before model fitting
        self.model.load_weights("experiments/tmp.h5")
        os.remove("experiments/tmp.h5")

        # Restore the original learning rate
        K.set_value(self.model.optimizer.lr, original_lr)
示例#24
0
    def on_batch_end(self, batch, logs):
        # Log the learning rate
        lr = K.get_value(self.model.optimizer.lr)
        self.lrs.append(lr)

        # Log the loss
        loss = logs["loss"]
        self.losses.append(loss)

        # Check whether the loss got too large or NaN
        if batch > 5 and (math.isnan(loss) or loss > self.best_loss * self.cutoff_coef):
            self.model.stop_training = True
            return

        if loss < self.best_loss:
            self.best_loss = loss

        # Increase the learning rate for the next batch
        lr *= self.lr_mult
        K.set_value(self.model.optimizer.lr, lr)
示例#25
0
    def __init__(self, model, checkpoint_dir):
        self._model = model

        # The epoch at which the checkpoint is saved. Used for fault-tolerance.
        # GPU device only has int64 dtype registered VarHandleOp.
        self._ckpt_saved_epoch = variables.Variable(
            initial_value=constant_op.constant(CKPT_SAVED_EPOCH_UNUSED_VALUE,
                                               dtype=dtypes.int64),
            name='ckpt_saved_epoch')

        # Variable initialization.
        backend.set_value(self._ckpt_saved_epoch,
                          CKPT_SAVED_EPOCH_UNUSED_VALUE)

        # _ckpt_saved_epoch gets tracked and is included in the checkpoint file
        # when backing up.
        checkpoint = trackable_util.Checkpoint(
            model=self._model, ckpt_saved_epoch=self._ckpt_saved_epoch)

        # If this is single-worker training, checkpoint_dir are the same for
        # write_checkpoint_manager and read_checkpoint_manager.
        #
        # If this is multi-worker training, and this worker should not
        # save checkpoint, we replace the write_checkpoint_manager's checkpoint_dir
        # with a temp filepath, so it writes to a file that will be removed at the
        # end of back_up() call. This is necessary because the SyncOnReadVariable
        # needs to be synced across all the workers in order to be read, and all
        # workers need to perform `save()`.
        # But all workers should restore from the same checkpoint_dir as passed in
        # read_checkpoint_manager.
        self.read_checkpoint_manager = checkpoint_management.CheckpointManager(
            checkpoint,
            directory=os.path.join(checkpoint_dir, 'chief'),
            max_to_keep=1)
        write_checkpoint_dir = distributed_file_utils.write_dirpath(
            checkpoint_dir, self._model.distribute_strategy)
        if self._model.distribute_strategy.extended.should_checkpoint:
            self.write_checkpoint_manager = self.read_checkpoint_manager
        else:
            self.write_checkpoint_manager = checkpoint_management.CheckpointManager(
                checkpoint, directory=write_checkpoint_dir, max_to_keep=1)
示例#26
0
    def train(self):
        train_ds, test_ds = self.loadData()

        print("start training ....")
        for epoch in range(self.epochs):
            train_processbar = ProcessBar()
            train_processbar.count = self.train_num
            for images, labels in train_ds:
                K.set_value(self.optimizer.lr, self.learning_rate)
                train_processbar.start_time = time.time()
                self.train_step(images, labels)
                template = 'Training Epoch: {} || learning rate: {} || Loss: {} || Accuracy: {}%'
                NoLinePrint(
                    template.format(
                        epoch + 1, format(self.optimizer.lr.numpy(), '.5f'),
                        format(self.train_loss.result(), '.2f'),
                        format(self.train_accuracy.result() * 100, '.2f')),
                    train_processbar)

            print("")
            test_processbar = ProcessBar()
            test_processbar.count = self.test_num
            for test_images, test_labels in test_ds:
                test_processbar.start_time = time.time()
                self.test_step(test_images, test_labels)

                template = 'Testing  Epoch: {} || Loss: {} || Accuracy: {}%'
                NoLinePrint(
                    template.format(
                        epoch + 1, format(self.test_loss.result(), '.2f'),
                        format(self.test_accuracy.result() * 100, '.2f')),
                    test_processbar)
            print("")

        save_path = CreateSavePath("/home/jade/Models/" + self.dataset_name +
                                   "Classify/")
        self.model.summary()
        self.model.save_weights(save_path + self.dataset_name + '_vgg16net' +
                                "_" + GetToday(),
                                save_format='tf')
示例#27
0
    def on_epoch_end(self, epoch, logs=None):
        # Additional code for delay
        if self.delayed_phase:
            self.n_empty_calls += 1
            if self.n_empty_calls == self.delay:
                self.delayed_phase = False
        # Original callback code
        else:
            logs = logs or {}
            logs['lr'] = K.get_value(self.model.optimizer.lr)
            current = logs.get(self.monitor)
            if current is None:
                print(
                    'Reduce LR on plateau conditioned on metric `%s` '
                    'which is not available. Available metrics are: %s',
                    self.monitor, ','.join(list(logs.keys())))
            else:
                if self.in_cooldown():
                    self.cooldown_counter -= 1
                    self.wait = 0

                if self.monitor_op(current, self.best):
                    self.best = current
                    self.wait = 0
                elif not self.in_cooldown():
                    self.wait += 1
                    if self.wait >= self.patience:
                        old_lr = float(K.get_value(self.model.optimizer.lr))
                        if old_lr > self.min_lr:
                            new_lr = old_lr * self.factor
                            new_lr = max(new_lr, self.min_lr)
                            K.set_value(self.model.optimizer.lr, new_lr)
                            if self.verbose > 0:
                                print(
                                    '\nEpoch %05d: ReduceLROnPlateau reducing learning '
                                    'rate to %s.' % (epoch + 1, new_lr))
                            self.cooldown_counter = self.cooldown
                            self.wait = 0
示例#28
0
def bidirectional_model():

    length_vocab, embedding_size = word2vec.shape

    model = Sequential()
    model.add(
        Embedding(length_vocab,
                  embedding_size,
                  input_length=parameters.max_length,
                  weights=[word2vec],
                  mask_zero=True,
                  name='embedding_layer'))

    for i in range(parameters.rnn_layers):
        bilstm = Bidirectional(
            LSTM(parameters.rnn_size,
                 return_sequences=True,
                 name='bilstm_layer_%d' % (i + 1)))
        model.add(bilstm)

    model.add(
        Lambda(simple_context,
               mask=lambda inputs, mask: mask[:, parameters.max_len_desc:],
               output_shape=lambda input_shape:
               (input_shape[0], parameters.max_len_head, 2 *
                (parameters.rnn_size - parameters.activation_rnn_size)),
               name='simple_context_layer'))

    vocab_size = word2vec.shape[0]
    model.add(TimeDistributed(Dense(vocab_size,
                                    name='time_distributed_layer')))

    model.add(Activation('softmax', name='activation_layer'))
    model.compile(loss='categorical_crossentropy', optimizer='adam')
    K.set_value(model.optimizer.lr, np.float32(parameters.learning_rate))
    print(model.summary())

    return model
示例#29
0
 def on_train_batch_end(self, batch, logs=None):
     if batch % self.update_interval == 0:
         if not hasattr(self.model.optimizer, 'lr'):
             raise ValueError('Optimizer must have a "lr" attribute.')
         try:  # new API
             lr = float(K.get_value(self.model.optimizer.learning_rate))
             if batch < 100:
                 lr = self.initial_lr
             else:
                 if self.decay_type == "exp":
                     lr_decay = (self.final_lr / self.initial_lr)**(
                         1. / (self.total_step - 1))
                     lr = self.initial_lr * (lr_decay**batch)
                 else:
                     ratio = max((self.total_step - batch - 1.) /
                                 (self.total_step - 1.), 0.)
                     lr = self.final_lr + (self.initial_lr -
                                           self.final_lr) * ratio
                 print(f'\n[UPDATE] Step {batch+1}, lr = {lr}')
         except TypeError:  # Support for old API for backward compatibility
             lr = self.initial_lr
             print(f"There is a TypeError: {TypeError}")
         K.set_value(self.model.optimizer.learning_rate, lr)
示例#30
0
    def on_epoch_end(self, epoch, logs=None):
        if epoch != self.total_epochs - 1:
            print('\n-------------------------------CALL BACK RECEIVED ------------------------')
            print(f'\n---------------------------EPOCH IS {epoch} ------------------------------\n')

            step_increase = (self.max_wt - self.beta) / self.total_epochs
            step_decrease = (self.alpha - self.least_wt) / self.total_epochs

            step_increase = step_increase * (epoch + 1)
            step_decrease = step_decrease * (epoch + 1)

            new_alpha = K.get_value(self.alpha) - step_decrease
            new_alpha = K.variable(new_alpha)

            new_beta = K.get_value(self.beta) + step_increase
            new_beta = K.variable(new_beta)

            K.set_value(self.alpha, K.get_value(new_alpha))
            K.set_value(self.beta, K.get_value(new_beta))

            logging.info("epoch %s, alpha = %s, beta = %s" % (epoch, K.get_value(self.alpha), K.get_value(self.beta)))

            print('Done setting')
示例#31
0
def create_model():

    length_vocab, embedding_size = word2vec.shape
    print("shape of word2vec matrix ", word2vec.shape)

    model = Sequential()
    model.add(
        Embedding(length_vocab,
                  embedding_size,
                  input_length=parameters.max_length,
                  weights=[word2vec],
                  mask_zero=True,
                  name='embedding_layer'))

    for i in range(parameters.rnn_layers):
        gru = GRU(parameters.rnn_size,
                  return_sequences=True,
                  name='gru_layer_%d' % (i + 1))

        model.add(gru)

    model.add(
        Lambda(simple_context,
               mask=lambda inputs, mask: mask[:, parameters.max_len_desc:],
               output_shape=output_shape_simple_context_layer,
               name='simple_context_layer'))

    vocab_size = word2vec.shape[0]
    model.add(TimeDistributed(Dense(vocab_size,
                                    name='time_distributed_layer')))

    model.add(Activation('softmax', name='activation_layer'))
    model.compile(loss='categorical_crossentropy', optimizer='adam')
    K.set_value(model.optimizer.lr, np.float32(parameters.learning_rate))
    print(model.summary())

    return model
示例#32
0
def train(train_generator,
          model_train,
          iter_n,
          max_iter,
          val_generator=None,
          val_steps=None,
          logger=None):

    init_lr = config.learning_rate
    # Update learning rate
    schedule = {"step": {"80": 5e-5, "160": 1e-5, "200": 1e-6}}
    update_lr = update_learning_rate(schedule, init_lr, iter_n, max_iter)
    if update_lr != init_lr:
        init_lr = update_lr
        K.set_value(model_train.optimizer.lr, init_lr)
        logger.info('=> updated learning rate: {}'.format(
            K.get_value(model_train.optimizer.lr)))

    steps_per_epoch = config.iterations
    if val_generator is None:
        model_train.fit_generator(train_generator,
                                  steps_per_epoch=steps_per_epoch,
                                  epochs=iter_n + 1,
                                  initial_epoch=iter_n,
                                  verbose=2,
                                  workers=config.workers)
    else:
        model_train.fit_generator(train_generator,
                                  steps_per_epoch=steps_per_epoch,
                                  epochs=iter_n + 1,
                                  initial_epoch=iter_n,
                                  validation_data=val_generator,
                                  validation_steps=val_steps,
                                  verbose=2,
                                  workers=config.workers)
    model_weights = model_train.get_weights()
    return model_weights
示例#33
0
    def _byteps_average_metrics_in_place(self, logs):
        logs = logs or {}
        reduced_logs = {}
        import byteps.tensorflow as bps

        if self._allreduce_ranks <= 1.:
            self._allreduce_ranks = float(bps.size())
        # Reduce every metric among workers. Sort metrics by name
        # to ensure consistent order.
        for metric, value in sorted(logs.items()):
            from tensorflow.python.eager import context
            if context.executing_eagerly():
                with tf.device(self._device):
                    reduced_logs[metric] = bps.push_pull(
                        K.constant(value, name=metric),
                        op=bps.ops.ReduceOps.Sum).numpy()
            else:
                if metric not in self.variables:
                    with tf.name_scope('MetricAverageCallback') as scope:
                        var = tf.Variable(value, name=metric)
                        K.get_session().run(var.initializer)
                        self._m_vars[metric] = var
                        self._allreduce_ops[metric] = bps.push_pull(
                            var,
                            scope,
                            device_dense=self._device,
                            op=bps.ops.ReduceOps.Sum)
                else:
                    K.set_value(self._m_vars[metric], value)
                reduced_logs[metric] = K.get_session().run(
                    self._allreduce_ops[metric])

        # Override the reduced values back into logs dictionary
        # for other callbacks to use.
        for metric, value in reduced_logs.items():
            logs[metric] = value / self._allreduce_ranks
示例#34
0
    def schedule(optimizer, current_step):
        current_step = max(1, current_step)

        if current_step < warmup_steps:
            warmup_lr = base_lr * current_step / warmup_steps
            K.set_value(optimizer.lr, K.get_value(warmup_lr))
        else:
            for index, bound in enumerate(boundaries):
                if current_step <= bound:
                    K.set_value(optimizer.lr, K.get_value(values[index]))
                    return
            K.set_value(optimizer.lr, K.get_value(values[-1]))
        return
示例#35
0
    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        logs['lr'] = K.get_value(self.model.optimizer.lr)
        current = logs.get(self.monitor)
        if current is None:
            logging.warning('Reduce LR on plateau conditioned on metric `%s` '
                            'which is not available. Available metrics are: %s',
                            self.monitor, ','.join(list(logs.keys())))

        else:
            if epoch < self.warmup:
                if self.init_lr is None:
                    self.init_lr = float(K.get_value(self.model.optimizer.lr))
                    self.warmup_rate = np.logspace(np.log10(self.warmup_rate), 0, self.warmup)
                K.set_value(self.model.optimizer.lr, self.init_lr * self.warmup_rate[epoch])
                return
            elif epoch == self.warmup:
                K.set_value(self.model.optimizer.lr, self.init_lr)

            if self.in_cooldown():
                self.cooldown_counter -= 1
                self.wait = 0

            if self.monitor_op(current, self.best):
                self.best = current
                self.wait = 0
            elif not self.in_cooldown():
                self.wait += 1
                if self.wait >= self.patience:
                    old_lr = float(K.get_value(self.model.optimizer.lr))
                    if old_lr > self.min_lr:
                        new_lr = old_lr * self.factor
                        new_lr = max(new_lr, self.min_lr)
                        K.set_value(self.model.optimizer.lr, new_lr)
                        if self.verbose > 0:
                            print('\nEpoch %05d: ReduceLROnPlateau reducing learning '
                                  'rate to %s.' % (epoch + 1, new_lr))
                        self.cooldown_counter = self.cooldown
                        self.wait = 0
 def on_epoch_begin(self, epoch, logs={}):
     temp_lambda = 2/(1+np.exp(-0.1*epoch))-1
     print(temp_lambda)
     K.set_value(self.alpha, temp_lambda)
示例#37
0
  def test_dynamic_loss_scaling(self, strategy_fn, cloning=True):
    strategy = strategy_fn()
    initial_loss_scale = 2.
    batch_size = 4
    expected_gradient = backend.variable([initial_loss_scale / batch_size],
                                         dtype=dtypes.float16)
    # If this variable is set to True, the model below will have NaN gradients
    have_nan_gradients = backend.variable(False, dtype=dtypes.bool)
    with strategy.scope():
      with policy.policy_scope(policy.Policy('infer_float32_vars')):
        x = layers.Input(shape=(1,), batch_size=batch_size,
                         dtype=dtypes.float16)
        layer = AddLayer(assert_type=dtypes.float16)
        y = layer(x)
        identity_with_nan_grads = (
            mp_test_util.create_identity_with_nan_gradients_fn(
                have_nan_gradients))
        y = core.Lambda(identity_with_nan_grads)(y)
        identity_with_grad_check_fn = (
            mp_test_util.create_identity_with_grad_check_fn(
                expected_dtype=dtypes.float16,
                expected_gradient=expected_gradient))
        y = core.Lambda(identity_with_grad_check_fn)(y)
        y = math_ops.cast(y, dtypes.float32)
        model = models.Model(inputs=x, outputs=y)

        def loss_fn(y_true, y_pred):
          del y_true
          return math_ops.reduce_mean(y_pred)

        opt = gradient_descent.SGD(1.)
        loss_scale = loss_scale_module.DynamicLossScale(
            initial_loss_scale=initial_loss_scale, increment_period=2)
        opt = loss_scale_optimizer.LossScaleOptimizer(opt, loss_scale)
        model.compile(opt, loss=loss_fn, cloning=cloning)

    self.assertEqual(backend.eval(layer.v), 1)
    x = np.ones((batch_size, 1))
    y = np.ones((batch_size, 1))
    dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).batch(batch_size)
    model.fit(dataset)
    # The variables starts with 1 and has a gradient of 1, so will go down by 1
    # each step.
    self.assertEqual(backend.eval(layer.v), 0)

    model.fit(dataset)
    self.assertEqual(backend.eval(layer.v), -1)

    # There have been two steps without NaNs, so the loss scale will double
    backend.set_value(expected_gradient,
                      backend.get_value(expected_gradient * 2))
    model.fit(dataset)
    self.assertEqual(backend.eval(layer.v), -2)

    # Next test with NaN gradients.
    backend.set_value(have_nan_gradients, True)
    model.fit(dataset)
    # Variable should not be updated
    self.assertEqual(backend.eval(layer.v), -2)

    # Test with finite gradients again
    backend.set_value(have_nan_gradients, False)
    # The loss scale will be halved due to the NaNs, so the gradient will also
    # be halved
    backend.set_value(expected_gradient,
                      backend.get_value(expected_gradient / 2))
    model.fit(dataset)
    self.assertEqual(backend.eval(layer.v), -3)
示例#38
0
 def reset_states(self):
     K.set_value(self.true_positives, 0)
示例#39
0
 def reset_states(self):
   K.set_value(self.true_positives, 0)
  def reset_states(self, states=None):
    if not self.stateful:
      raise AttributeError('Layer must be stateful.')
    input_shape = self.input_spec[0].shape
    state_shape = self.compute_output_shape(input_shape)
    if self.return_state:
      state_shape = state_shape[0]
    if self.return_sequences:
      state_shape = state_shape[:1].concatenate(state_shape[2:])
    if None in state_shape:
      raise ValueError('If a RNN is stateful, it needs to know '
                       'its batch size. Specify the batch size '
                       'of your input tensors: \n'
                       '- If using a Sequential model, '
                       'specify the batch size by passing '
                       'a `batch_input_shape` '
                       'argument to your first layer.\n'
                       '- If using the functional API, specify '
                       'the time dimension by passing a '
                       '`batch_shape` argument to your Input layer.\n'
                       'The same thing goes for the number of rows and '
                       'columns.')

    # helper function
    def get_tuple_shape(nb_channels):
      result = list(state_shape)
      if self.cell.data_format == 'channels_first':
        result[1] = nb_channels
      elif self.cell.data_format == 'channels_last':
        result[3] = nb_channels
      else:
        raise KeyError
      return tuple(result)

    # initialize state if None
    if self.states[0] is None:
      if hasattr(self.cell.state_size, '__len__'):
        self.states = [K.zeros(get_tuple_shape(dim))
                       for dim in self.cell.state_size]
      else:
        self.states = [K.zeros(get_tuple_shape(self.cell.state_size))]
    elif states is None:
      if hasattr(self.cell.state_size, '__len__'):
        for state, dim in zip(self.states, self.cell.state_size):
          K.set_value(state, np.zeros(get_tuple_shape(dim)))
      else:
        K.set_value(self.states[0],
                    np.zeros(get_tuple_shape(self.cell.state_size)))
    else:
      if not isinstance(states, (list, tuple)):
        states = [states]
      if len(states) != len(self.states):
        raise ValueError('Layer ' + self.name + ' expects ' +
                         str(len(self.states)) + ' states, ' +
                         'but it received ' + str(len(states)) +
                         ' state values. Input received: ' + str(states))
      for index, (value, state) in enumerate(zip(states, self.states)):
        if hasattr(self.cell.state_size, '__len__'):
          dim = self.cell.state_size[index]
        else:
          dim = self.cell.state_size
        if value.shape != get_tuple_shape(dim):
          raise ValueError('State ' + str(index) +
                           ' is incompatible with layer ' +
                           self.name + ': expected shape=' +
                           str(get_tuple_shape(dim)) +
                           ', found shape=' + str(value.shape))
        # TODO(anjalisridhar): consider batch calls to `set_value`.
        K.set_value(state, value)