def test_validate_callbacks_predefined_callbacks(self): supported_predefined_callbacks = [ callbacks.TensorBoard(), callbacks.CSVLogger(filename='./log.csv'), callbacks.EarlyStopping(), callbacks.ModelCheckpoint(filepath='./checkpoint'), callbacks.TerminateOnNaN(), callbacks.ProgbarLogger(), callbacks.History(), callbacks.RemoteMonitor() ] distributed_training_utils.validate_callbacks( supported_predefined_callbacks, adam.Adam()) unsupported_predefined_callbacks = [ callbacks.ReduceLROnPlateau(), callbacks.LearningRateScheduler(schedule=lambda epoch: 0.001) ] for callback in unsupported_predefined_callbacks: with self.assertRaisesRegex(ValueError, 'You must specify a Keras Optimizer V2'): distributed_training_utils.validate_callbacks([callback], v1_adam.AdamOptimizer())
def _create_csv_logger(artifact_dir: str) -> callbacks.CSVLogger: """Create a CSVLogger callback. Args: artifact_dir: str, path to artifact directory. Returns: CSVLogger, CSVLogger callbackk. """ filename = os.path.join(artifact_dir, CSV_LOGGER_FILENAME) return callbacks.CSVLogger(filename=filename, separator=",", append=True)
def define_callbacks(output, batch_size): csv_logger = callbacks.CSVLogger(join(output, 'training.log')) earlystop = callbacks.EarlyStopping(monitor='val_loss', patience=2) tensorboard = callbacks.TensorBoard(batch_size=batch_size) fpath = join( output, 'weights.{epoch:02d}-{loss:.2f}-{acc:.2f}-{val_loss:.2f}-{val_acc:.2f}.hdf5' ) cp_cb = callbacks.ModelCheckpoint(filepath=fpath, monitor='val_loss', save_best_only=True) return [csv_logger, earlystop, tensorboard, cp_cb]
def setUp(self): super(CallbackFallbackTest, self).setUp() self.batch_size = 5 self.numpy_input = np.zeros((50, 10)) self.numpy_target = np.ones(50) self.tensor_input = constant_op.constant(2.0, shape=(50, 10)) self.tensor_target = array_ops.ones((50,)) self.dataset_input = dataset_ops.DatasetV2.from_tensor_slices( (self.numpy_input, self.numpy_target)).shuffle(50).batch( self.batch_size) def generator(): yield (np.zeros((self.batch_size, 10)), np.ones(self.batch_size)) self.generator_input = generator() self.sequence_input = TestSequence(batch_size=self.batch_size, feature_shape=10) self.fallback_ckeckpoint_cb = cbks.ModelCheckpoint( self.get_temp_dir(), save_freq=10) self.normal_checkpoint_cb = cbks.ModelCheckpoint( self.get_temp_dir(), save_freq='epoch') self.fallback_tensorboard_cb = cbks.TensorBoard(update_freq=10) self.normal_tensorboard_cb = cbks.TensorBoard(update_freq='batch') self.unaffected_cb = cbks.CSVLogger(self.get_temp_dir())
def build_callbacks(self, conf, callbacks_list): ''' The purpose of the method is to set up logging and history. It is based on Keras Callbacks https://github.com/fchollet/keras/blob/fbc9a18f0abc5784607cd4a2a3886558efa3f794/keras/callbacks.py Currently used callbacks include: BaseLogger, CSVLogger, EarlyStopping. Other possible callbacks to add in future: RemoteMonitor, LearningRateScheduler Argument list: - conf: There is a "callbacks" section in conf.yaml file. Relevant parameters are: - list: Parameter specifying additional callbacks, read in the driver script and passed as an argument of type list (see next arg) - metrics: List of quantities monitored during training and validation - mode: one of {auto, min, max}. The decision to overwrite the current save file is made based on either the maximization or the minimization of the monitored quantity. For val_acc, this should be max, for val_loss this should be min, etc. In auto mode, the direction is automatically inferred from the name of the monitored quantity. - monitor: Quantity used for early stopping, has to be from the list of metrics - patience: Number of epochs used to decide on whether to apply early stopping or continue training - callbacks_list: uses callbacks.list configuration parameter, specifies the list of additional callbacks Returns: modified list of callbacks ''' mode = conf['callbacks']['mode'] monitor = conf['callbacks']['monitor'] patience = conf['callbacks']['patience'] csvlog_save_path = conf['paths']['csvlog_save_path'] # CSV callback is on by default if not os.path.exists(csvlog_save_path): os.makedirs(csvlog_save_path) callbacks_list = conf['callbacks']['list'] callbacks = [cbks.BaseLogger()] callbacks += [self.history] callbacks += [ cbks.CSVLogger("{}callbacks-{}.log".format( csvlog_save_path, datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S"))) ] if "earlystop" in callbacks_list: callbacks += [ cbks.EarlyStopping(patience=patience, monitor=monitor, mode=mode) ] if "lr_scheduler" in callbacks_list: pass return cbks.CallbackList(callbacks)
def run(epochs, num_batches, batch_size=1, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-08, save_every=10, patience=5, baseline=2e-5, resume=False): # Destroy old graph K.clear_session() # Initialize batch generators batch_train = build_features.get_train_batches(batch_size=batch_size) batch_valid = build_features.get_valid_batches(batch_size=batch_size) # Create TensorFlow Iterator object itr_train = build_features.make_iterator(batch_train) itr_valid = build_features.make_iterator(batch_valid) # Init callbacks cbs = list() # EarlyStopping callback: stops whenever loss doesn't imporve # cbs.append(early_stopping.EarlyStopping(monitor='val_loss', mode='min', patience=patience, # verbose=1, baseline=baseline)) # ModelCheckpoint callback: saves model every SAVE_EVERY save_path = paths.checkpoints.regnet( rot=ROT, disp=DISP) # ./checkpoints/regnet/train save_path.parent.mkdir(exist_ok=True, parents=True) if save_path.exists() and not resume: save_path.unlink() # deletes file before training cbs.append( callbacks.ModelCheckpoint(str(save_path), save_best_only=True, period=save_every)) # TensorBoard callback: saves logs for tensorboard log_path = str(paths.logs.regnet()) # ./logs/regnet/train cbs.append( callbacks.TensorBoard(log_dir=log_path, batch_size=batch_size, write_graph=True)) # History callback: saves all losses cbs.append( callbacks.CSVLogger(save_path.with_suffix('.csv'), append=True, separator=',')) # Create the network net = regnet.Regnet(learning_rate, beta1, beta2, epsilon) # Configures the model for training net.model.compile(optimizer=net.train_opt, loss=net.model_loss, metrics=net.metrics) # Load the pretrained imagenet weights load_weights.imagenet_weights(net.model) if resume: net.model = keras.models.load_model(save_path, custom_objects=CUSTOM_LAYERS, compile=True) # Train network net.model.fit_generator(generator=itr_train, validation_data=itr_valid, validation_steps=batch_size, epochs=epochs, steps_per_epoch=num_batches, callbacks=cbs, verbose=1, workers=0)
def run_training(configpath): params = config.Parameters(configpath) #save config for this training to be sure copyfile(configpath, params.folder_path_run + "config_backup_train.ini") datasetid = params.datasetid if params.estimator_mode == "vrad_kszgal": channels_in = 2 #2 channels_out = 1 npad = params.npad print("padding:", npad) img_shape = (params.imgsizepix + 2 * npad, params.imgsizepix + 2 * npad, channels_in) batch_size = params.batch_size #1 epochs = params.epochs #1000 #5 num_train_examples = params.nsims_train #10000 #1000 #10000 #should/could math the TFrecord file. this defines how long an epoch is. num_valid_examples = params.nsims_valid #1000 #300 #1000 nx = params.nx save_model_path = params.folder_path_run + 'model.ckpt' print("save as", save_model_path) restore_model = os.path.exists( save_model_path + ".index" ) #whether or not to restore a previous training and training from there ################### NETWORK estimatornet = networks.EstimatorNet(params) inputs, outputs = getattr(estimatornet, params.network)(img_shape, channels_out) lossfunctions = losses.Lossfunctions(params) if params.loss_mode == "pixelMSE_unfiltered": lossfunc = lossfunctions.loss_pixelMSE_unfiltered lossfuncname = 'loss_pixelMSE_unfiltered' model = models.Model(inputs=[inputs], outputs=[outputs]) if params.optimizer == 'Adam': optim = optimizers.Adam( lr=params.learning_rate ) #https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Adam model.compile(optimizer=optim, loss=lossfunc) #, metrics=['mean_squared_error'] model.summary() cp = tf.keras.callbacks.ModelCheckpoint(filepath=save_model_path, monitor='val_loss', save_weights_only=True, save_best_only=True, verbose=1) callback_csv = callbacks.CSVLogger(params.folder_path_run + 'training_history_log.csv', append=True) #https://stackoverflow.com/questions/50127527/how-to-save-training-history-on-every-epoch-in-keras #check whether or not we want to load a previous model if restore_model: print("WE TRAIN FROM PREVIOUS CHECKPOINT.") else: print("WE TRAIN FROM START.") if restore_model: print("loading weights from", save_model_path) model.load_weights(save_model_path) ################### DATA SET dataset_train_raw = tf.data.TFRecordDataset(params.datapath + "datasets/dataset_train_" + str(datasetid) + ".tfrecords") dataset_valid_raw = tf.data.TFRecordDataset(params.datapath + "datasets/dataset_valid_" + str(datasetid) + ".tfrecords") dataset_train_parsed = dataset_train_raw.map( lambda x: trainingdata.tfrecord_parse_function(x, npad, params), num_parallel_calls=8) dataset_valid_parsed = dataset_valid_raw.map( lambda x: trainingdata.tfrecord_parse_function(x, npad, params), num_parallel_calls=8) #https://stackoverflow.com/questions/53514495/what-does-batch-repeat-and-shuffle-do-with-tensorflow-dataset dataset_train_parsed = dataset_train_parsed.shuffle( buffer_size=100, reshuffle_each_iteration=True).repeat().batch(batch_size) dataset_valid_parsed = dataset_valid_parsed.repeat().batch(batch_size) # Create an iterator for the dataset and the above modifications. iterator_train = dataset_train_parsed.make_one_shot_iterator() iterator_valid = dataset_valid_parsed.make_one_shot_iterator() #################### TRAINING history = model.fit( iterator_train, steps_per_epoch=int(np.ceil(num_train_examples / float(batch_size))), epochs=epochs, validation_data=iterator_valid, validation_steps=int(np.ceil(num_valid_examples / float(batch_size))), verbose=2, callbacks=[cp, callback_csv]) loss = history.history['loss'] val_loss = history.history['val_loss'] np.savez(params.folder_path_run + "loss", loss=loss, val_loss=val_loss)
monitor='val_predictions_categorical_accuracy', verbose=1, save_best_only=True, mode='auto', save_weights_only=True, period=1) reduce_lr = callbacks.ReduceLROnPlateau(monitor='val_predictions_loss', factor=0.25, patience=10, verbose=1, mode='auto', min_delta=1e-6, cooldown=0, min_lr=0) csv_logger = callbacks.CSVLogger(os.path.join(save_dir, 'Log_V1.log'), separator=',', append=False) train_data_generator = Train_data_generator(batch_size) valid_data_generator = Valid_data_generator(batch_size) model.fit_generator(generator=train_data_generator, steps_per_epoch=int(210030 / batch_size), epochs=epochs, verbose=1, callbacks=[checkpoint, reduce_lr, csv_logger], validation_data=valid_data_generator, validation_steps=int(7530 / batch_size), workers=1, class_weight=class_weight, use_multiprocessing=False, shuffle=True)