def on_epoch_begin(self, epoch, logs=None): # TODO(yashkatariya): Change the property checking when the learning # rate attribute is unified across all TF Optimizers. if isinstance(self.model.optimizer, optimizers.TFOptimizer): if not hasattr(self.model.optimizer.optimizer, '_lr') and not hasattr( self.model.optimizer.optimizer, '_learning_rate'): raise ValueError( 'TF Optimizer must have a "_lr" or "_learning_rate" attribute.') else: opt = self.model.optimizer.optimizer if hasattr(opt, '_lr'): opt_lr = Variable(opt._lr) # pylint: disable=protected-access elif hasattr(opt, '_learning_rate'): opt_lr = Variable(opt._learning_rate) # pylint: disable=protected-access else: if not hasattr(self.model.optimizer, 'lr'): raise ValueError('Optimizer must have a "lr" attribute.') else: opt = self.model.optimizer opt_lr = opt.lr try: # new API lr = float(K.get_value(opt_lr)) lr = self.schedule(epoch, lr) except TypeError: # Support for old API for backward compatibility lr = self.schedule(epoch) if not isinstance(lr, (float, np.float32, np.float64)): raise ValueError('The output of the "schedule" function ' 'should be float.') K.set_value(opt_lr, lr) if self.verbose > 0: print('\nEpoch %05d: LearningRateScheduler reducing learning ' 'rate to %s.' % (epoch + 1, lr))
def on_epoch_end(self, epoch, logs=None): logs = logs or {} logs['lr'] = K.get_value(self.model.optimizer.lr) current = logs.get(self.monitor) if current is None: logging.warning('Reduce LR on plateau conditioned on metric `%s` ' 'which is not available. Available metrics are: %s', self.monitor, ','.join(list(logs.keys()))) else: if self.in_cooldown(): self.cooldown_counter -= 1 self.wait = 0 if self.monitor_op(current, self.best): self.best = current self.wait = 0 elif not self.in_cooldown(): self.wait += 1 if self.wait >= self.patience: old_lr = float(K.get_value(self.model.optimizer.lr)) if old_lr > self.min_lr: new_lr = old_lr * self.factor new_lr = max(new_lr, self.min_lr) K.set_value(self.model.optimizer.lr, new_lr) if self.verbose > 0: print('\nEpoch %05d: ReduceLROnPlateau reducing learning ' 'rate to %s.' % (epoch + 1, new_lr)) self.cooldown_counter = self.cooldown self.wait = 0
def reset_states(self): """Resets all of the metric state variables. This function is called between epochs/steps, when a metric is evaluated during training. """ for v in self.variables: K.set_value(v, 0)
def _set_hyper(self, name, value): """set hyper `name` to value. value can be callable, tensor, numeric.""" if name not in self._hyper: self._hyper[name] = value else: prev_value = self._hyper[name] if callable(prev_value) or isinstance(prev_value, (ops.Tensor, int, float)): self._hyper[name] = value else: backend.set_value(self._hyper[name], value)
def on_epoch_begin(self, epoch, logs=None): if not hasattr(self.model.optimizer, 'lr'): raise ValueError('Optimizer must have a "lr" attribute.') lr = self.schedule(epoch) if not isinstance(lr, (float, np.float32, np.float64)): raise ValueError('The output of the "schedule" function ' 'should be float.') K.set_value(self.model.optimizer.lr, lr) if self.verbose > 0: print('\nEpoch %05d: LearningRateScheduler reducing learning ' 'rate to %s.' % (epoch + 1, lr))
def _set_hyper(self, name, value): """set hyper `name` to value. value can be callable, tensor, numeric.""" if name not in self._hyper: self._hyper[name] = value else: prev_value = self._hyper[name] if (callable(prev_value) or isinstance(prev_value, (ops.Tensor, int, float, learning_rate_schedule.LearningRateSchedule)) or isinstance(value, learning_rate_schedule.LearningRateSchedule)): self._hyper[name] = value else: backend.set_value(self._hyper[name], value)
def on_epoch_end(self, epoch, logs=None): if epoch < self.warmup_steps: return def _get_lr(): try: lr = K.get_value(self.model.optimizer.lr) except ValueError: lr = self.model.lr(self.model.optimizer.iterations) finally: lr = None return lr logs = logs or {} current = logs.get(self.monitor) if current is None: logging.warning( f'ReduceLROnPlateau conditioned on metric' f' {self.monitor} which is not available.' f' Available metrics are: {",".join(list(logs.keys()))}') else: if self.in_cooldown(): self.cooldown_counter -= 1 self.wait = 0 if self.monitor_op(current, self.best): self.best = current self.wait = 0 elif not self.in_cooldown(): self.wait += 1 if self.wait >= self.patience: step = self.model.optimizer.iterations old_lr = self.model._get_lr(step) if old_lr > self.min_lr: new_lr = old_lr * self.factor new_lr = max(new_lr, self.min_lr) K.set_value(self.model.optimizer.lr, new_lr) if self.verbose > 0: logging.warning( f'ReduceLROnPlateau (step {epoch}):' ' Reducing learning rate from:' f' {old_lr} to {new_lr:g}.') print(f'current: {current}, best: {self.best}') # print(f'\nstep {epoch}: ReduceLROnPlateau' # ' reducing learning rate from:' # f' {old_lr} to {new_lr:g}.') self.cooldown_counter = self.cooldown self.wait = 0
def on_epoch_begin(self, epoch, logs=None): if not hasattr(self.model.optimizer, 'lr'): raise ValueError('Optimizer must have a "lr" attribute.') try: # new API lr = float(K.get_value(self.model.optimizer.lr)) lr = self.schedule(epoch, lr) except TypeError: # Support for old API for backward compatibility lr = self.schedule(epoch) if not isinstance(lr, (float, np.float32, np.float64)): raise ValueError('The output of the "schedule" function ' 'should be float.') K.set_value(self.model.optimizer.lr, lr) if self.verbose > 0: print('\nEpoch %05d: LearningRateScheduler reducing learning ' 'rate to %s.' % (epoch + 1, lr))
def on_epoch_begin(self, epoch, logs=None): if not hasattr(self.model.optimizer, 'lr'): raise ValueError('Optimizer must have a "lr" attribute.') try: # new API lr = float(K.get_value(self.model.optimizer.lr)) lr = self.schedule(epoch, lr) except TypeError: # Support for old API for backward compatibility lr = self.schedule(epoch) if not isinstance(lr, (float, np.float32, np.float64)): raise ValueError('The output of the "schedule" function ' 'should be float.') K.set_value(self.model.optimizer.lr, lr) if self.verbose > 0: print('\nEpoch %05d: LearningRateScheduler reducing learning ' 'rate to %s.' % (epoch + 1, lr))
def testLRBackwardsCompatibility(self): """This tests learning rate getting/setting used by old Keras callbacks.""" opt = optimizers.Kfac( learning_rate=3, damping=5, model=_simple_mlp(), loss='mse') self.assertEqual(backend.get_value(opt.lr), 3) self.assertEqual(backend.get_value(opt.learning_rate), 3) opt.lr = 7 self.assertEqual(backend.get_value(opt.lr), 7) self.assertEqual(backend.get_value(opt.learning_rate), 7) backend.set_value(opt.lr, 9) self.assertEqual(backend.get_value(opt.lr), 9) self.assertEqual(backend.get_value(opt.learning_rate), 9) backend.set_value(opt.learning_rate, 11) self.assertEqual(backend.get_value(opt.lr), 11) self.assertEqual(backend.get_value(opt.learning_rate), 11)
def on_epoch_begin(self, state): # check whetehr the current epoch is in smooth transition of resolutions fade_epoch = self.fade_start[self._idx] if state["epoch"] == fade_epoch: self.nimg_total = self.duration[self._idx] * state["num_examples"] self.change_alpha = True self.nimg_so_far = 0 print("FastEstimator-Alpha: Started fading in for size {}".format( 2**(self._idx + 3))) elif state["epoch"] == fade_epoch + self.duration[self._idx]: print("FastEstimator-Alpha: Finished fading in for size {}".format( 2**(self._idx + 3))) self.change_alpha = False self._idx += 1 backend.set_value(self.alpha, 1.0)
def train_models(self): style_id = 0 new_lr = self.LR_Initial for step in range(self.N_steps): style_ids = [style_id for i in range(self.Batch_Size)] batch_ids = self.get_batch_ids(self.Batch_Size, self.n_content) # Load the DB print("Loading DB, step {}...".format(step), end='') self.Content_DB = np.array([ resize(imread(self.Content_DB_list[batch_id]), self.img_shape[1:]) for batch_id in batch_ids ]) style_im = resize(imread(self.Style_DB_list[style_id]), self.img_shape[1:]) self.Style_DB = np.array([style_im for style_id in style_ids]) print("Finished Loading DB") if step % (self.T + 1) != self.T: # Train Style loss_style = self.StyleNet[style_id].train_on_batch( self.Content_DB, self.Style_DB) self.TensorBoardStyleNet[style_id].on_epoch_end( step, self.named_logs(self.StyleNet[style_id], loss_style)) else: # Train AE loss_autoencoder = self.AutoEncoderNet.train_on_batch( self.Content_DB, self.Content_DB) self.TensorBoardAutoEncoder.on_epoch_end( step, self.named_logs(self.AutoEncoderNet, loss_autoencoder)) style_id += 1 style_id = style_id % self.n_styles if step % self.print_iter == 0 and step != 0: print( "step {0}, loss_style={1}, loss_autoencoder={2}, timestamp={3}" .format(step, loss_style, loss_autoencoder, datetime.now())) if step % self.LR_Update_Every == 0 and step != 0: new_lr = new_lr * self.LR_Decay self.LR_Current = new_lr for i in self.style_bank: K.set_value(self.StyleNet[i].optimizer.lr, new_lr) K.set_value(self.AutoEncoderNet.optimizer.lr, new_lr) print("Updating LR to: StyleNet={}, AutoEncoder={}".format( K.eval(self.StyleNet[0].optimizer.lr), K.eval(self.AutoEncoderNet.optimizer.lr))) for i in self.style_bank: self.TensorBoardStyleNet[i].on_train_end(None) self.TensorBoardAutoEncoder.on_train_end(None)
def _set_hyper(self, name, value): """set hyper `name` to value. value can be callable, tensor, numeric.""" if isinstance(value, trackable.Trackable): self._track_trackable(value, name, overwrite=True) if name not in self._hyper: self._hyper[name] = value else: prev_value = self._hyper[name] if (callable(prev_value) or isinstance( prev_value, (ops.Tensor, int, float, learning_rate_schedule.LearningRateSchedule)) or isinstance( value, learning_rate_schedule.LearningRateSchedule)): self._hyper[name] = value else: backend.set_value(self._hyper[name], value)
def set_tfidf_data(self, tfidf_data): tfidf_data = self._convert_to_ndarray(tfidf_data) if self._output_mode != TFIDF: raise RuntimeError( "In order to set TF-IDF data, the output mode must be 'tf-idf'.") if tfidf_data.ndim != 1: raise ValueError("TF-IDF data must be a 1-index array.") if self._max_tokens is not None: input_data_length = tfidf_data.shape[0] if input_data_length > self._max_tokens: raise ValueError("The array provided has %d elements. This layer is " "configured to only allow %d elements." % (input_data_length, self._max_tokens)) if input_data_length < self._max_tokens: tfidf_data = np.resize(tfidf_data, (self._max_tokens,)) K.set_value(self.tf_idf_weights, tfidf_data)
def on_epoch_end(self, loss, epoch): if(self.best > loss): self.wait=0 self.best = loss else: self.wait = self.wait+1 cur_lr = K.get_value(self.optim.lr) if(cur_lr > self.min_lr): if(self.wait > self.patience): self.wait=0 new_lr = cur_lr*self.factor new_lr = max(new_lr, self.min_lr) K.set_value(self.optim.lr, new_lr) print("Epoch {}: ReduceLROnPlateau reducing learning rate to {}".format(epoch, new_lr)) return
def on_epoch_begin(self, epoch, logs=None): train_gen = TrainDataGenerator(batch_size=self.batch_size, num_samples=self.num_samples, rndgray=True) val_gen = ValDataGenerator(batch_size=self.batch_size, rndgray=True) self.model.save_weights(self.tmp_weights_path) self.losses = [] history = self.model.history current_lr = float(K.get_value(self.model.optimizer.lr)) neighborhood = int(np.ceil(self.neighborhood_fraction * self.num_lr)) local_lr_start = current_lr * (10 ** -self.neighborhood_log_range) * (self.epoch_range_factor ** (epoch)) local_lr_end = current_lr * (10 ** self.neighborhood_log_range) * (self.epoch_range_factor ** (epoch)) global_geomspace = np.geomspace(self.min_lr, self.max_lr, self.num_lr - neighborhood) local_geomspace = np.geomspace(local_lr_start, local_lr_end, neighborhood) self.learning_rates = np.sort(np.concatenate([global_geomspace, local_geomspace])) if self.verbose: print('\n--- Searching among {} learning rates ---'.format(self.num_lr)) for lr in self.learning_rates: history = self.model.fit_generator(generator=train_gen, validation_data=val_gen, epochs=1, use_multiprocessing=True, workers=12, verbose=0) self.losses.append(history.history['val_loss'][0]) if self.verbose: print('\t> Validation loss for lr={:.3e}: {:.3f}'.format(lr, history.history['val_loss'][0])) self.model.load_weights(self.tmp_weights_path) best_lr = self.learning_rates[np.argmin(self.losses)] K.set_value(self.model.optimizer.lr, best_lr) self.model.history = history if self.verbose: print('\n\tBest learning rate: {:.3e}\n'.format(best_lr)) if self.epoch_lr_graph: plt.figure(figsize=(12, 6)) plt.plot(self.learning_rates[:len(self.losses)], self.losses, '#800000') plt.xlabel("Learning Rate") plt.ylabel("Loss") plt.xscale('log') plt.title('Learning Rate Discovery (Epoch {})'.format(epoch + 1)) plt.savefig(os.path.join(self.visualization_dir, '{}_epoch_{:0>3d}.jpg'.format(self.visualization_prefix, epoch + 1)))
def retrain_model(model_file, checkpoint, eval_data, metric, weights_file=None, custom_objects=None, lr=None): """ Load a previously trained model and continue training Parameters: model_file (str): path to model .h5 file lr (float): initial learning rate eval_data (tf.Dataset): data on which to calculate starting metrics metric (str): metric name for checkpoint logging weights_file (str): path to .hdf5 model weights file Return: keras.Model: """ def get_weighted_bce(y_true, y_pred): return weighted_bce(y_true, y_pred, weight) def get_gen_dice(y_true, y_pred): return gen_dice(y_true, y_pred, global_weights=weight) if custom_objects: # custom_objects = {'get_weighted_bce': get_weighted_bce} custom_objects = custom_objects else: custom_objects = {} # load our previously trained model and weights m = models.load_model(model_file, custom_objects=custom_objects) if weights_file: m.load_weights(weights_file) # set the initial evaluation metric for saving checkpoints to the previous best value evalMetrics = m.evaluate(x=eval_data, verbose=1) metrics = m.metrics_names index = metrics.index(metric) checkpoint.best = evalMetrics[index] # set the learning rate for re-training if not lr: lr = backend.eval(m.optimizer.learning_rate) backend.set_value(m.optimizer.learning_rate, lr) return m, checkpoint
def switch(self, switch_flag=None): ''' Switch the phase of the optimizer. Arguments: switch_flag: if set `True`, use SGD with nesterov momentum; Otherwise, use NAdam/NAmsgrad. If set None, it would switch the phase according to the current phase. ''' if switch_flag is None: switch_flag = not bool(K.get_value(self.switch_flag)) else: switch_flag = bool(switch_flag) if switch_flag: # using NSGD self.beta_g = K.set_value(self.beta_g, 1.0) else: # using Nadam self.beta_g = K.set_value(self.beta_g, 1.0 - K.get_value(self.beta_1)) K.set_value(self.switch_flag, bool(switch_flag))
def on_train_batch_begin(self, batch, logs=None): if not hasattr(self.model.optimizer, 'lr'): raise ValueError('Optimizer must have a "lr" attribute.') try: # new API lr_o = float(K.get_value(self.model.optimizer.lr)) lr = self.schedule(self._total_batches_seen_lr, lr_o) except TypeError: # Support for old API for backward compatibility raise TypeError('The schedule function accepts two arguments - iteratnion and last learning rate') if not isinstance(lr, (ops.Tensor, float, np.float32, np.float64)): raise ValueError('The output of the "schedule" function ' 'should be float.') if isinstance(lr, ops.Tensor) and not lr.dtype.is_floating: raise ValueError('The dtype of Tensor should be float') K.set_value(self.model.optimizer.lr, K.get_value(lr)) if self.verbose > 0 and lr_o != lr: print('\nIteration %05d: LearningRateScheduler reducing learning ' 'rate to %s.' % (self._total_batches_seen_lr + 1, lr))
def on_batch_begin(self, batch, logs=None): global_step = self.epoch * self.steps_per_epoch + batch if not hasattr(self.model.optimizer, 'lr'): raise ValueError('Optimizer must have a "lr" attribute.') last_lr = float(K.get_value(self.model.optimizer.lr)) if global_step % 1000 == 0: print('lr-batch-epoch: %.4f %s %s' % (last_lr, batch, self.epoch)) lr = self.schedule(batch, last_lr) if not isinstance(lr, (float, np.float32, np.float64)): raise ValueError('The output of the "schedule" function ' 'should be float.') if last_lr != lr: K.set_value(self.model.optimizer.lr, lr) logs = logs or {} logs['lr'] = K.get_value(self.model.optimizer.lr) if self.verbose > 0: print('\nStep %05d: LearningRateScheduler reducing learning ' 'rate to %s.' % (global_step + 1, lr))
def _update_weights(self, x): k = -1 for layer in self._layers: w_list = [] w_trainable = [w.trainable for w in layer.weights] batch_update = False not in w_trainable for w in layer.weights: if not w.trainable: continue k += 1 shape = w.get_shape() value = np.array(x[k]).reshape(shape) if batch_update: w_list.append(value) else: K.set_value(w, value) if batch_update: layer.set_weights(w_list)
def on_epoch_begin(self, epoch, logs={}): if epoch % self.epoch_limit == 0: print("\n\nTRAINING BRANCH " + str(self.branch_num) + "\n\n") for n in self.base_name_arr: #Set current branch to true K.set_value( self.model.get_layer(str(self.branch_num) + n).trainable, True) #Set previous branch to false (if not first branch) if self.branch_num > 0: K.set_value( self.model.get_layer(str(self.branch_num - 1) + n).trainable, False) self.branch_num += 1 #Iterate through layers to double check 'trainable' #Comment out when debugged for layer in model.layers: print(layer, layer.trainable)
def find_generator( self, generator, start_lr, end_lr, epochs=1, steps_per_epoch=None, **kw_fit ): if steps_per_epoch is None: try: steps_per_epoch = len(generator) except (ValueError, NotImplementedError) as e: raise e( "`steps_per_epoch=None` is only valid for a" " generator based on the " "`keras.utils.Sequence`" " class. Please specify `steps_per_epoch` " "or use the `keras.utils.Sequence` class." ) self.lr_mult = (end_lr / float(start_lr)) ** (1.0 / (epochs * steps_per_epoch)) # Save weights into a file self.model.save_weights("experiments/tmp.h5") # Remember the original learning rate original_lr = K.get_value(self.model.optimizer.lr) # Set the initial learning rate K.set_value(self.model.optimizer.lr, start_lr) callback = LambdaCallback( on_batch_end=lambda batch, logs: self.on_batch_end(batch, logs) ) kw_fit["callbacks"] = kw_fit.get("callbacks", []) + [callback] self.model.fit_generator( generator=generator, epochs=epochs, steps_per_epoch=steps_per_epoch, **kw_fit ) # Restore the weights to the state before model fitting self.model.load_weights("experiments/tmp.h5") os.remove("experiments/tmp.h5") # Restore the original learning rate K.set_value(self.model.optimizer.lr, original_lr)
def on_batch_end(self, batch, logs): # Log the learning rate lr = K.get_value(self.model.optimizer.lr) self.lrs.append(lr) # Log the loss loss = logs["loss"] self.losses.append(loss) # Check whether the loss got too large or NaN if batch > 5 and (math.isnan(loss) or loss > self.best_loss * self.cutoff_coef): self.model.stop_training = True return if loss < self.best_loss: self.best_loss = loss # Increase the learning rate for the next batch lr *= self.lr_mult K.set_value(self.model.optimizer.lr, lr)
def __init__(self, model, checkpoint_dir): self._model = model # The epoch at which the checkpoint is saved. Used for fault-tolerance. # GPU device only has int64 dtype registered VarHandleOp. self._ckpt_saved_epoch = variables.Variable( initial_value=constant_op.constant(CKPT_SAVED_EPOCH_UNUSED_VALUE, dtype=dtypes.int64), name='ckpt_saved_epoch') # Variable initialization. backend.set_value(self._ckpt_saved_epoch, CKPT_SAVED_EPOCH_UNUSED_VALUE) # _ckpt_saved_epoch gets tracked and is included in the checkpoint file # when backing up. checkpoint = trackable_util.Checkpoint( model=self._model, ckpt_saved_epoch=self._ckpt_saved_epoch) # If this is single-worker training, checkpoint_dir are the same for # write_checkpoint_manager and read_checkpoint_manager. # # If this is multi-worker training, and this worker should not # save checkpoint, we replace the write_checkpoint_manager's checkpoint_dir # with a temp filepath, so it writes to a file that will be removed at the # end of back_up() call. This is necessary because the SyncOnReadVariable # needs to be synced across all the workers in order to be read, and all # workers need to perform `save()`. # But all workers should restore from the same checkpoint_dir as passed in # read_checkpoint_manager. self.read_checkpoint_manager = checkpoint_management.CheckpointManager( checkpoint, directory=os.path.join(checkpoint_dir, 'chief'), max_to_keep=1) write_checkpoint_dir = distributed_file_utils.write_dirpath( checkpoint_dir, self._model.distribute_strategy) if self._model.distribute_strategy.extended.should_checkpoint: self.write_checkpoint_manager = self.read_checkpoint_manager else: self.write_checkpoint_manager = checkpoint_management.CheckpointManager( checkpoint, directory=write_checkpoint_dir, max_to_keep=1)
def train(self): train_ds, test_ds = self.loadData() print("start training ....") for epoch in range(self.epochs): train_processbar = ProcessBar() train_processbar.count = self.train_num for images, labels in train_ds: K.set_value(self.optimizer.lr, self.learning_rate) train_processbar.start_time = time.time() self.train_step(images, labels) template = 'Training Epoch: {} || learning rate: {} || Loss: {} || Accuracy: {}%' NoLinePrint( template.format( epoch + 1, format(self.optimizer.lr.numpy(), '.5f'), format(self.train_loss.result(), '.2f'), format(self.train_accuracy.result() * 100, '.2f')), train_processbar) print("") test_processbar = ProcessBar() test_processbar.count = self.test_num for test_images, test_labels in test_ds: test_processbar.start_time = time.time() self.test_step(test_images, test_labels) template = 'Testing Epoch: {} || Loss: {} || Accuracy: {}%' NoLinePrint( template.format( epoch + 1, format(self.test_loss.result(), '.2f'), format(self.test_accuracy.result() * 100, '.2f')), test_processbar) print("") save_path = CreateSavePath("/home/jade/Models/" + self.dataset_name + "Classify/") self.model.summary() self.model.save_weights(save_path + self.dataset_name + '_vgg16net' + "_" + GetToday(), save_format='tf')
def on_epoch_end(self, epoch, logs=None): # Additional code for delay if self.delayed_phase: self.n_empty_calls += 1 if self.n_empty_calls == self.delay: self.delayed_phase = False # Original callback code else: logs = logs or {} logs['lr'] = K.get_value(self.model.optimizer.lr) current = logs.get(self.monitor) if current is None: print( 'Reduce LR on plateau conditioned on metric `%s` ' 'which is not available. Available metrics are: %s', self.monitor, ','.join(list(logs.keys()))) else: if self.in_cooldown(): self.cooldown_counter -= 1 self.wait = 0 if self.monitor_op(current, self.best): self.best = current self.wait = 0 elif not self.in_cooldown(): self.wait += 1 if self.wait >= self.patience: old_lr = float(K.get_value(self.model.optimizer.lr)) if old_lr > self.min_lr: new_lr = old_lr * self.factor new_lr = max(new_lr, self.min_lr) K.set_value(self.model.optimizer.lr, new_lr) if self.verbose > 0: print( '\nEpoch %05d: ReduceLROnPlateau reducing learning ' 'rate to %s.' % (epoch + 1, new_lr)) self.cooldown_counter = self.cooldown self.wait = 0
def bidirectional_model(): length_vocab, embedding_size = word2vec.shape model = Sequential() model.add( Embedding(length_vocab, embedding_size, input_length=parameters.max_length, weights=[word2vec], mask_zero=True, name='embedding_layer')) for i in range(parameters.rnn_layers): bilstm = Bidirectional( LSTM(parameters.rnn_size, return_sequences=True, name='bilstm_layer_%d' % (i + 1))) model.add(bilstm) model.add( Lambda(simple_context, mask=lambda inputs, mask: mask[:, parameters.max_len_desc:], output_shape=lambda input_shape: (input_shape[0], parameters.max_len_head, 2 * (parameters.rnn_size - parameters.activation_rnn_size)), name='simple_context_layer')) vocab_size = word2vec.shape[0] model.add(TimeDistributed(Dense(vocab_size, name='time_distributed_layer'))) model.add(Activation('softmax', name='activation_layer')) model.compile(loss='categorical_crossentropy', optimizer='adam') K.set_value(model.optimizer.lr, np.float32(parameters.learning_rate)) print(model.summary()) return model
def on_train_batch_end(self, batch, logs=None): if batch % self.update_interval == 0: if not hasattr(self.model.optimizer, 'lr'): raise ValueError('Optimizer must have a "lr" attribute.') try: # new API lr = float(K.get_value(self.model.optimizer.learning_rate)) if batch < 100: lr = self.initial_lr else: if self.decay_type == "exp": lr_decay = (self.final_lr / self.initial_lr)**( 1. / (self.total_step - 1)) lr = self.initial_lr * (lr_decay**batch) else: ratio = max((self.total_step - batch - 1.) / (self.total_step - 1.), 0.) lr = self.final_lr + (self.initial_lr - self.final_lr) * ratio print(f'\n[UPDATE] Step {batch+1}, lr = {lr}') except TypeError: # Support for old API for backward compatibility lr = self.initial_lr print(f"There is a TypeError: {TypeError}") K.set_value(self.model.optimizer.learning_rate, lr)
def on_epoch_end(self, epoch, logs=None): if epoch != self.total_epochs - 1: print('\n-------------------------------CALL BACK RECEIVED ------------------------') print(f'\n---------------------------EPOCH IS {epoch} ------------------------------\n') step_increase = (self.max_wt - self.beta) / self.total_epochs step_decrease = (self.alpha - self.least_wt) / self.total_epochs step_increase = step_increase * (epoch + 1) step_decrease = step_decrease * (epoch + 1) new_alpha = K.get_value(self.alpha) - step_decrease new_alpha = K.variable(new_alpha) new_beta = K.get_value(self.beta) + step_increase new_beta = K.variable(new_beta) K.set_value(self.alpha, K.get_value(new_alpha)) K.set_value(self.beta, K.get_value(new_beta)) logging.info("epoch %s, alpha = %s, beta = %s" % (epoch, K.get_value(self.alpha), K.get_value(self.beta))) print('Done setting')
def create_model(): length_vocab, embedding_size = word2vec.shape print("shape of word2vec matrix ", word2vec.shape) model = Sequential() model.add( Embedding(length_vocab, embedding_size, input_length=parameters.max_length, weights=[word2vec], mask_zero=True, name='embedding_layer')) for i in range(parameters.rnn_layers): gru = GRU(parameters.rnn_size, return_sequences=True, name='gru_layer_%d' % (i + 1)) model.add(gru) model.add( Lambda(simple_context, mask=lambda inputs, mask: mask[:, parameters.max_len_desc:], output_shape=output_shape_simple_context_layer, name='simple_context_layer')) vocab_size = word2vec.shape[0] model.add(TimeDistributed(Dense(vocab_size, name='time_distributed_layer'))) model.add(Activation('softmax', name='activation_layer')) model.compile(loss='categorical_crossentropy', optimizer='adam') K.set_value(model.optimizer.lr, np.float32(parameters.learning_rate)) print(model.summary()) return model
def train(train_generator, model_train, iter_n, max_iter, val_generator=None, val_steps=None, logger=None): init_lr = config.learning_rate # Update learning rate schedule = {"step": {"80": 5e-5, "160": 1e-5, "200": 1e-6}} update_lr = update_learning_rate(schedule, init_lr, iter_n, max_iter) if update_lr != init_lr: init_lr = update_lr K.set_value(model_train.optimizer.lr, init_lr) logger.info('=> updated learning rate: {}'.format( K.get_value(model_train.optimizer.lr))) steps_per_epoch = config.iterations if val_generator is None: model_train.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=iter_n + 1, initial_epoch=iter_n, verbose=2, workers=config.workers) else: model_train.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=iter_n + 1, initial_epoch=iter_n, validation_data=val_generator, validation_steps=val_steps, verbose=2, workers=config.workers) model_weights = model_train.get_weights() return model_weights
def _byteps_average_metrics_in_place(self, logs): logs = logs or {} reduced_logs = {} import byteps.tensorflow as bps if self._allreduce_ranks <= 1.: self._allreduce_ranks = float(bps.size()) # Reduce every metric among workers. Sort metrics by name # to ensure consistent order. for metric, value in sorted(logs.items()): from tensorflow.python.eager import context if context.executing_eagerly(): with tf.device(self._device): reduced_logs[metric] = bps.push_pull( K.constant(value, name=metric), op=bps.ops.ReduceOps.Sum).numpy() else: if metric not in self.variables: with tf.name_scope('MetricAverageCallback') as scope: var = tf.Variable(value, name=metric) K.get_session().run(var.initializer) self._m_vars[metric] = var self._allreduce_ops[metric] = bps.push_pull( var, scope, device_dense=self._device, op=bps.ops.ReduceOps.Sum) else: K.set_value(self._m_vars[metric], value) reduced_logs[metric] = K.get_session().run( self._allreduce_ops[metric]) # Override the reduced values back into logs dictionary # for other callbacks to use. for metric, value in reduced_logs.items(): logs[metric] = value / self._allreduce_ranks
def schedule(optimizer, current_step): current_step = max(1, current_step) if current_step < warmup_steps: warmup_lr = base_lr * current_step / warmup_steps K.set_value(optimizer.lr, K.get_value(warmup_lr)) else: for index, bound in enumerate(boundaries): if current_step <= bound: K.set_value(optimizer.lr, K.get_value(values[index])) return K.set_value(optimizer.lr, K.get_value(values[-1])) return
def on_epoch_end(self, epoch, logs=None): logs = logs or {} logs['lr'] = K.get_value(self.model.optimizer.lr) current = logs.get(self.monitor) if current is None: logging.warning('Reduce LR on plateau conditioned on metric `%s` ' 'which is not available. Available metrics are: %s', self.monitor, ','.join(list(logs.keys()))) else: if epoch < self.warmup: if self.init_lr is None: self.init_lr = float(K.get_value(self.model.optimizer.lr)) self.warmup_rate = np.logspace(np.log10(self.warmup_rate), 0, self.warmup) K.set_value(self.model.optimizer.lr, self.init_lr * self.warmup_rate[epoch]) return elif epoch == self.warmup: K.set_value(self.model.optimizer.lr, self.init_lr) if self.in_cooldown(): self.cooldown_counter -= 1 self.wait = 0 if self.monitor_op(current, self.best): self.best = current self.wait = 0 elif not self.in_cooldown(): self.wait += 1 if self.wait >= self.patience: old_lr = float(K.get_value(self.model.optimizer.lr)) if old_lr > self.min_lr: new_lr = old_lr * self.factor new_lr = max(new_lr, self.min_lr) K.set_value(self.model.optimizer.lr, new_lr) if self.verbose > 0: print('\nEpoch %05d: ReduceLROnPlateau reducing learning ' 'rate to %s.' % (epoch + 1, new_lr)) self.cooldown_counter = self.cooldown self.wait = 0
def on_epoch_begin(self, epoch, logs={}): temp_lambda = 2/(1+np.exp(-0.1*epoch))-1 print(temp_lambda) K.set_value(self.alpha, temp_lambda)
def test_dynamic_loss_scaling(self, strategy_fn, cloning=True): strategy = strategy_fn() initial_loss_scale = 2. batch_size = 4 expected_gradient = backend.variable([initial_loss_scale / batch_size], dtype=dtypes.float16) # If this variable is set to True, the model below will have NaN gradients have_nan_gradients = backend.variable(False, dtype=dtypes.bool) with strategy.scope(): with policy.policy_scope(policy.Policy('infer_float32_vars')): x = layers.Input(shape=(1,), batch_size=batch_size, dtype=dtypes.float16) layer = AddLayer(assert_type=dtypes.float16) y = layer(x) identity_with_nan_grads = ( mp_test_util.create_identity_with_nan_gradients_fn( have_nan_gradients)) y = core.Lambda(identity_with_nan_grads)(y) identity_with_grad_check_fn = ( mp_test_util.create_identity_with_grad_check_fn( expected_dtype=dtypes.float16, expected_gradient=expected_gradient)) y = core.Lambda(identity_with_grad_check_fn)(y) y = math_ops.cast(y, dtypes.float32) model = models.Model(inputs=x, outputs=y) def loss_fn(y_true, y_pred): del y_true return math_ops.reduce_mean(y_pred) opt = gradient_descent.SGD(1.) loss_scale = loss_scale_module.DynamicLossScale( initial_loss_scale=initial_loss_scale, increment_period=2) opt = loss_scale_optimizer.LossScaleOptimizer(opt, loss_scale) model.compile(opt, loss=loss_fn, cloning=cloning) self.assertEqual(backend.eval(layer.v), 1) x = np.ones((batch_size, 1)) y = np.ones((batch_size, 1)) dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).batch(batch_size) model.fit(dataset) # The variables starts with 1 and has a gradient of 1, so will go down by 1 # each step. self.assertEqual(backend.eval(layer.v), 0) model.fit(dataset) self.assertEqual(backend.eval(layer.v), -1) # There have been two steps without NaNs, so the loss scale will double backend.set_value(expected_gradient, backend.get_value(expected_gradient * 2)) model.fit(dataset) self.assertEqual(backend.eval(layer.v), -2) # Next test with NaN gradients. backend.set_value(have_nan_gradients, True) model.fit(dataset) # Variable should not be updated self.assertEqual(backend.eval(layer.v), -2) # Test with finite gradients again backend.set_value(have_nan_gradients, False) # The loss scale will be halved due to the NaNs, so the gradient will also # be halved backend.set_value(expected_gradient, backend.get_value(expected_gradient / 2)) model.fit(dataset) self.assertEqual(backend.eval(layer.v), -3)
def reset_states(self): K.set_value(self.true_positives, 0)
def reset_states(self): K.set_value(self.true_positives, 0)
def reset_states(self, states=None): if not self.stateful: raise AttributeError('Layer must be stateful.') input_shape = self.input_spec[0].shape state_shape = self.compute_output_shape(input_shape) if self.return_state: state_shape = state_shape[0] if self.return_sequences: state_shape = state_shape[:1].concatenate(state_shape[2:]) if None in state_shape: raise ValueError('If a RNN is stateful, it needs to know ' 'its batch size. Specify the batch size ' 'of your input tensors: \n' '- If using a Sequential model, ' 'specify the batch size by passing ' 'a `batch_input_shape` ' 'argument to your first layer.\n' '- If using the functional API, specify ' 'the time dimension by passing a ' '`batch_shape` argument to your Input layer.\n' 'The same thing goes for the number of rows and ' 'columns.') # helper function def get_tuple_shape(nb_channels): result = list(state_shape) if self.cell.data_format == 'channels_first': result[1] = nb_channels elif self.cell.data_format == 'channels_last': result[3] = nb_channels else: raise KeyError return tuple(result) # initialize state if None if self.states[0] is None: if hasattr(self.cell.state_size, '__len__'): self.states = [K.zeros(get_tuple_shape(dim)) for dim in self.cell.state_size] else: self.states = [K.zeros(get_tuple_shape(self.cell.state_size))] elif states is None: if hasattr(self.cell.state_size, '__len__'): for state, dim in zip(self.states, self.cell.state_size): K.set_value(state, np.zeros(get_tuple_shape(dim))) else: K.set_value(self.states[0], np.zeros(get_tuple_shape(self.cell.state_size))) else: if not isinstance(states, (list, tuple)): states = [states] if len(states) != len(self.states): raise ValueError('Layer ' + self.name + ' expects ' + str(len(self.states)) + ' states, ' + 'but it received ' + str(len(states)) + ' state values. Input received: ' + str(states)) for index, (value, state) in enumerate(zip(states, self.states)): if hasattr(self.cell.state_size, '__len__'): dim = self.cell.state_size[index] else: dim = self.cell.state_size if value.shape != get_tuple_shape(dim): raise ValueError('State ' + str(index) + ' is incompatible with layer ' + self.name + ': expected shape=' + str(get_tuple_shape(dim)) + ', found shape=' + str(value.shape)) # TODO(anjalisridhar): consider batch calls to `set_value`. K.set_value(state, value)