def sql_net(opts): """ A 1D convnet that uses a generator reading from a SQL database instead of loading all files into memory at once. """ verbose = 1 if opts.verbose else 0 db = sas_io.sql_connect(opts.database) counts = model_counts(db, tag=opts.train) encoder = OnehotEncoder(counts.keys()) train_seq = sas_io.iread_sql(db, opts.train, encoder=encoder, batch_size=opts.batch) validation_seq = sas_io.iread_sql(db, opts.validation, encoder=encoder, batch_size=opts.batch) # Grab some training data so we can see how big it is x, y = next(train_seq) tb = TensorBoard(log_dir=opts.tensorboard, histogram_freq=1) es = EarlyStopping(min_delta=0.001, patience=15, verbose=verbose) # Begin model definitions nq = len(x[0]) model = Sequential() model.add(Conv1D(nq, kernel_size=8, activation='relu', input_shape=[nq, 1])) model.add(MaxPooling1D(pool_size=4)) model.add(Dropout(.17676)) model.add(Conv1D(nq // 2, kernel_size=6, activation='relu')) model.add(MaxPooling1D(pool_size=3)) model.add(Dropout(.20782)) model.add(Flatten()) model.add(Dense(nq // 4, activation='tanh')) model.add(Dropout(.20582)) model.add(Dense(nq // 4, activation='softmax')) model.compile(loss="categorical_crossentropy", optimizer=keras.optimizers.Adadelta(), metrics=['accuracy']) # Model Run if verbose > 0: print(model.summary()) history = model.fit_generator(train_seq, steps_per_epoch=opts.steps, epochs=opts.epochs, workers=1, verbose=verbose, validation_data=validation_seq, max_queue_size=1, callbacks=[tb, es]) score = None if xval is not None and yval is not None: score = model.evaluate(xval, yval, verbose=verbose) print('\nTest loss: ', score[0]) print('Test accuracy:', score[1]) save_output(save_path=opts.save_path, model=model, encoder=encoder, history=history, seed=None, score=score) logging.info("Complete.")
val_dataset = data.get_val_set() model_name = f"catvsdog_4block_GAVG_BN_MC_CLB" model = build_model(data.img_shape, data.num_classes) model.compile(loss=categorical_crossentropy, optimizer=Adam(learning_rate=LEARNING_RATE), metrics=["accuracy"]) model.summary() model_log_dir = os.path.join(LOGS_DIR, f"model_{model_name}") tb_callback = TensorBoard(log_dir=model_log_dir, histogram_freq=0, profile_batch=0, write_graph=False) mc_callback = ModelCheckpoint(os.path.join(MODELS_DIR, model_name), monitor='val_loss', mode='auto', save_best_only=True, save_weights_only=False, verbose=1) history = model.fit(train_dataset, epochs=EPOCHS, batch_size=data.batch_size, verbose=1, validation_data=val_dataset, callbacks=[tb_callback, mc_callback])
parser.add_argument("--height") parser.add_argument("--epochs") parser.add_argument("--dropout_rate") parser.add_argument("--learning_rate") parser.add_argument("--dataset_name") parser.add_argument("--train_input") parser.add_argument("--model_version") parser.add_argument("--model_dir") parser.add_argument("--model_fname") parser.add_argument("--labels") parser.add_argument("--tempfile", default=True) args = parser.parse_args() print(args) LOG_DIR = args.model_dir + '/logs' tensorboard_callback = TensorBoard(log_dir=LOG_DIR) original_dataset_dir = args.train_input height = int(args.height) width = int(args.width) batch_size = int(args.batch_size) dropout_rate = float(args.dropout_rate) epochs = int(args.epochs) model_dir = args.model_dir model_file = model_dir + args.model_version + "/" + args.model_fname learning_rate = float(args.learning_rate) train_df, val_df = read_input(args.train_input, args.dataset_name, args.labels) ##################################### # Train the model using EfficientNet.
model[j].add(Conv2D(128, kernel_size=4, activation='relu')) model[j].add(BatchNormalization()) model[j].add(Flatten()) model[j].add(Dropout(0.4)) model[j].add(Dense(num_classes, activation='softmax')) model[j].compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=[ 'accuracy', 'sparse_categorical_accuracy', 'mean_squared_error', 'mean_absolute_error' ]) NAME = "{}-conv-{}-nodes-{}-dense{}".format(7, 48, 1, int(time.time())) tensorboard[j] = TensorBoard( log_dir='{}\\{}______{}'.format(SAVE_PATH, NAME, j)) csv_logger[j] = CSVLogger('{}\\csvlogs\\{}__{}.csv'.format( SAVE_PATH, NAME, j), separator=',', append=False) modelCheckpoint[j] = ModelCheckpoint('checkpoint.hdf5', monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=1) print('Model', j + 1, ':')
def main(args): annotation_file = args.annotation_file log_dir = 'logs/000/' classes_path = args.classes_path class_names = get_classes(classes_path) num_classes = len(class_names) anchors = get_anchors(args.anchors_path) num_anchors = len(anchors) # get freeze level according to CLI option if args.weights_path: freeze_level = 0 else: freeze_level = 1 if args.freeze_level is not None: freeze_level = args.freeze_level # callbacks for training process logging = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=False, write_grads=False, write_images=False, update_freq='batch') checkpoint = ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5', monitor='val_loss', verbose=1, save_weights_only=False, save_best_only=True, period=1) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, verbose=1, cooldown=0, min_lr=1e-10) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=50, verbose=1) terminate_on_nan = TerminateOnNaN() callbacks=[logging, checkpoint, reduce_lr, early_stopping, terminate_on_nan] # get train&val dataset dataset = get_dataset(annotation_file) if args.val_annotation_file: val_dataset = get_dataset(args.val_annotation_file) num_train = len(dataset) num_val = len(val_dataset) dataset.extend(val_dataset) else: val_split = args.val_split num_val = int(len(dataset)*val_split) num_train = len(dataset) - num_val # get different model type & train&val data generator if num_anchors == 9: # YOLOv3 use 9 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper tiny_version = False elif num_anchors == 6: # Tiny YOLOv3 use 6 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper tiny_version = True elif num_anchors == 5: # YOLOv2 use 5 anchors get_train_model = get_yolo2_train_model data_generator = yolo2_data_generator_wrapper tiny_version = False else: raise ValueError('Unsupported anchors number') # prepare online evaluation callback if args.eval_online: eval_callback = EvalCallBack(args.model_type, dataset[num_train:], anchors, class_names, args.model_image_size, args.model_pruning, log_dir, eval_epoch_interval=args.eval_epoch_interval, save_eval_checkpoint=args.save_eval_checkpoint) callbacks.append(eval_callback) # prepare train/val data shuffle callback if args.data_shuffle: shuffle_callback = DatasetShuffleCallBack(dataset) callbacks.append(shuffle_callback) # prepare model pruning config pruning_end_step = np.ceil(1.0 * num_train / args.batch_size).astype(np.int32) * args.total_epoch if args.model_pruning: pruning_callbacks = [sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=log_dir, profile_batch=0)] callbacks = callbacks + pruning_callbacks # prepare optimizer optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=None) # get train model model = get_train_model(args.model_type, anchors, num_classes, weights_path=args.weights_path, freeze_level=freeze_level, optimizer=optimizer, label_smoothing=args.label_smoothing, model_pruning=args.model_pruning, pruning_end_step=pruning_end_step) # support multi-gpu training template_model = None if args.gpu_num >= 2: # keep the template model for saving result template_model = model model = multi_gpu_model(model, gpus=args.gpu_num) # recompile multi gpu model model.compile(optimizer=optimizer, loss={'yolo_loss': lambda y_true, y_pred: y_pred}) model.summary() # Transfer training some epochs with frozen layers first if needed, to get a stable loss. input_shape = args.model_image_size assert (input_shape[0]%32 == 0 and input_shape[1]%32 == 0), 'Multiples of 32 required' initial_epoch = args.init_epoch epochs = initial_epoch + args.transfer_epoch print("Transfer training stage") print('Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'.format(num_train, num_val, args.batch_size, input_shape)) model.fit_generator(data_generator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes), steps_per_epoch=max(1, num_train//args.batch_size), validation_data=data_generator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes), validation_steps=max(1, num_val//args.batch_size), epochs=epochs, initial_epoch=initial_epoch, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks) if args.decay_type: # rebuild optimizer to apply learning rate decay, only after # unfreeze all layers callbacks.remove(reduce_lr) steps_per_epoch = max(1, num_train//args.batch_size) decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch - args.transfer_epoch) optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=args.decay_type, decay_steps=decay_steps) # Unfreeze the whole network for further tuning # NOTE: more GPU memory is required after unfreezing the body print("Unfreeze and continue training, to fine-tune.") for i in range(len(model.layers)): model.layers[i].trainable = True model.compile(optimizer=optimizer, loss={'yolo_loss': lambda y_true, y_pred: y_pred}) # recompile to apply the change if args.multiscale: rescale_interval = args.rescale_interval else: rescale_interval = -1 #Doesn't rescale print('Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'.format(num_train, num_val, args.batch_size, input_shape)) model.fit_generator(data_generator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, rescale_interval), steps_per_epoch=max(1, num_train//args.batch_size), validation_data=data_generator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes), validation_steps=max(1, num_val//args.batch_size), epochs=args.total_epoch, initial_epoch=epochs, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks) # Finally store model if args.model_pruning: if template_model is not None: template_model = sparsity.strip_pruning(template_model) else: model = sparsity.strip_pruning(model) if template_model is not None: template_model.save(log_dir + 'trained_final.h5') else: model.save(log_dir + 'trained_final.h5')
z = Add()([z_mu, z_eps]) x_pred = decoder(z) vae = Model(inputs=[x, eps], outputs=x_pred) #vae.compile(optimizer='rmsprop', loss=nll) vae.compile(optimizer='adam', loss='sparse_categorical_crossentropy') # train the VAE on MNIST digits (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = x_train.reshape(-1, original_dim) / 255. x_test = x_test.reshape(-1, original_dim) / 255. #tensorboard --logdir=./logs --port 6006 tb = TensorBoard(".\logs\MNIST-Papillon") vae.fit(x_train, x_train, shuffle=True, epochs=epochs, batch_size=batch_size, validation_data=(x_test, x_test), callbacks=[tb]) encoder = Model(x, z_mu) # display a 2D plot of the digit classes in the latent space z_test = encoder.predict(x_test, batch_size=batch_size) plt.figure(figsize=(6, 6)) plt.scatter(z_test[:, 0],
model.add(BatchNormalization()) model.add(LSTM(128, activation = "relu", input_shape = (train_x.shape[1:]))) model.add(Dropout(0.2)) model.add(BatchNormalization()) model.add(Dense(32, activation = "relu")) model.add(Dropout(0.2)) model.add(Dense(2, activation = "softmax")) opt = tf.keras.optimizers.Adam(lr = 0.001, decay = 1e-6) model.compile(loss = "sparse_categorical_crossentropy", optimizer = opt, metrics = ['accuracy']) tensorboard = TensorBoard(log_dir = f'logs/{NAME}') filepath = "RNN_Final-{epoch:02d}-{val_acc:.3f}" checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor = 'val_acc', verbose = 1, save_best_only = True, mode = 'max')) train_x = np.asarray(train_x) train_y = np.asarray(train_y) validation_x = np.asarray(validation_x) validation_y = np.asarray(validation_y) history = model.fit(train_x, train_y, batch_size = BATCH_SIZE, epochs = EPOCHS, validation_data = (validation_x, validation_y)), callbacks = [tensorboard, checkpoint])
def fit_with_generator( self, train_data_raw, labels_raw, model_filepath, weights_filepath, logs_dir, training_log, resume, ): """Fit the training data to the network and save the network model as a HDF file. Arguments: train_data_raw {list} -- The HDF5 raw training data. labels_raw {list} -- The HDF5 raw training labels. model_filepath {string} -- The model file path. weights_filepath {string} -- The weights file path. logs_dir {string} -- The TensorBoard log file directory. training_log {string} -- The path to the log file of epoch results. resume {bool} -- True to continue with previous training result or False to start a new one (default: {False}). Returns: tuple -- A tuple contains validation losses and validation accuracies. """ initial_epoch = 0 batch_size = self.hyperparameters.batch_size validation_split = self.hyperparameters.validation_split csv_logger = (CSVLogger(training_log) if not resume else CSVLogger(training_log, append=True)) checkpoint = ModelCheckpoint( filepath=weights_filepath, monitor=self.hyperparameters.monitor, verbose=1, save_best_only=False, save_weights_only=True, ) tensorboard = TensorBoard( log_dir=logs_dir, histogram_freq=0, write_graph=True, write_images=True, ) earlyStopping = EarlyStopping( monitor=self.hyperparameters.monitor, min_delta=self.hyperparameters.es_min_delta, mode=self.hyperparameters.es_mode, patience=self.hyperparameters.es_patience, verbose=1) callbacks_list = [ checkpoint, tensorboard, csv_logger, earlyStopping, ] if not resume: Optimizer = getattr(tf_optimizers, self.hyperparameters.optimizer) self.__model.compile( loss=self.hyperparameters.loss, optimizer=Optimizer( learning_rate=self.hyperparameters.learning_rate), metrics=self.hyperparameters.metrics, ) if resume: assert os.path.isfile( training_log ), "{} does not exist and is required by training resumption".format( training_log) training_log_file = open(training_log) initial_epoch += sum(1 for _ in training_log_file) - 1 training_log_file.close() assert self.hyperparameters.epochs > initial_epoch, \ "The existing model has been trained for {0} epochs. Make sure the total epochs are larger than {0}".format(initial_epoch) train_generator = self.__generator(train_data_raw, labels_raw, batch_size, validation_split, is_validation=False) test_generator = self.__generator(train_data_raw, labels_raw, batch_size, validation_split, is_validation=True) steps_per_epoch = math.ceil( float(train_data_raw.shape[0]) * (1 - validation_split) / batch_size) validation_steps = math.ceil( float(train_data_raw.shape[0]) * validation_split / batch_size) try: hist = self.__model.fit( train_generator, steps_per_epoch=steps_per_epoch, validation_data=test_generator, validation_steps=validation_steps, epochs=self.hyperparameters.epochs, shuffle=False, callbacks=callbacks_list, initial_epoch=initial_epoch, ) except KeyboardInterrupt: Network.__LOGGER.warning("Training interrupted by the user") raise TerminalException("Training interrupted by the user") finally: self.__model.save(model_filepath) Network.__LOGGER.warning("Model saved to %s" % model_filepath) return hist.history["val_loss"], hist.history["val_acc"] if int( tf.__version__.split(".")[0]) < 2 else hist.history["val_accuracy"]
op = Nadam(lr=0.001) batch_size = 32 model.compile(optimizer=op, loss="sparse_categorical_crossentropy", metrics=['acc']) es = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True, verbose=1) lr = ReduceLROnPlateau(monitor='val_loss', vactor=0.5, patience=10, verbose=1) path = 'C:/nmb/nmb_data/h5/5s_last/lstm2_nadam_mms.h5' mc = ModelCheckpoint(path, monitor='val_loss', verbose=1, save_best_only=True) tb = TensorBoard(log_dir='C:/nmb/nmb_data/graph/' + 'lstm2_nadam_mms' + "/", histogram_freq=0, write_graph=True, write_images=True) history = model.fit(x_train, y_train, epochs=5000, batch_size=batch_size, validation_split=0.2, callbacks=[es, lr, mc, tb]) # 평가, 예측 model.load_weights('C:/nmb/nmb_data/h5/5s_last/lstm2_nadam_mms.h5') result = model.evaluate(x_test, y_test, batch_size=batch_size) print("loss : {:.5f}".format(result[0])) print("acc : {:.5f}".format(result[1]), '\n') ############################################ PREDICT ####################################
def main(max_workers=3): from cellfinder.main import suppress_tf_logging suppress_tf_logging(tf_suppress_log_messages) from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint from cellfinder.tools.prep import prep_training from cellfinder.classify.tools import make_lists, get_model from cellfinder.classify.cube_generator import CubeGeneratorFromDisk start_time = datetime.now() args = training_parse() output_dir = Path(args.output_dir) ensure_directory_exists(output_dir) args = prep_training(args) yaml_contents = parse_yaml(args.yaml_file) tiff_files = get_tiff_files(yaml_contents) # Too many workers doesn't increase speed, and uses huge amounts of RAM workers = get_num_processes( min_free_cpu_cores=args.n_free_cpus, n_max_processes=max_workers ) model = get_model( existing_model=args.trained_model, model_weights=args.model_weights, network_depth=models[args.network_depth], learning_rate=args.learning_rate, continue_training=args.continue_training, ) signal_train, background_train, labels_train = make_lists(tiff_files) if args.test_fraction > 0: ( signal_train, signal_test, background_train, background_test, labels_train, labels_test, ) = train_test_split( signal_train, background_train, labels_train, test_size=args.test_fraction, ) validation_generator = CubeGeneratorFromDisk( signal_test, background_test, labels=labels_test, batch_size=args.batch_size, train=True, ) else: validation_generator = None training_generator = CubeGeneratorFromDisk( signal_train, background_train, labels=labels_train, batch_size=args.batch_size, shuffle=True, train=True, augment=not args.no_augment, ) callbacks = [] if args.tensorboard: logdir = output_dir / "tensorboard" ensure_directory_exists(logdir) tensorboard = TensorBoard( log_dir=logdir, histogram_freq=0, write_graph=True, update_freq="epoch", ) callbacks.append(tensorboard) if args.save_checkpoints: if args.save_weights: filepath = str( output_dir / "weights.{epoch:02d}-{val_loss:.3f}.h5" ) else: filepath = str(output_dir / "model.{epoch:02d}-{val_loss:.3f}.h5") checkpoints = ModelCheckpoint( filepath, save_weights_only=args.save_weights ) callbacks.append(checkpoints) model.fit( training_generator, validation_data=validation_generator, use_multiprocessing=True, workers=workers, epochs=args.epochs, callbacks=callbacks, ) if args.save_weights: print("Saving model weights") model.save_weights(str(output_dir / "model_weights.h5")) else: print("Saving model") model.save(output_dir / "model.h5") print( "Finished training, " "Total time taken: %s", datetime.now() - start_time, )
model = build_lstm(n_steps, n_feats) model.summary() # create these folders if they does not exist if not os.path.isdir("results"): os.mkdir("results") if not os.path.isdir("logs"): os.mkdir("logs") if not os.path.isdir("data"): os.mkdir("data") checkpointer = ModelCheckpoint(os.path.join("results", model_name + ".h5"), save_weights_only=True, save_best_only=True, verbose=1) tensorboard = TensorBoard(log_dir=os.path.join("logs", model_name)) early_stopping = EarlyStopping(patience=10) hist = model.fit(x_train, y_train, batch_size=128, epochs=100, validation_data=(x_val, y_val), callbacks=[checkpointer, tensorboard, early_stopping], verbose=1) model.save(os.path.join("results", model_name) + ".h5") results = model.evaluate(x_test, y_test, batch_size=128) # save hist to history.pkl f = open('history.pkl', 'wb') pickle.dump(hist.history, f)
def create_callbacks(args, logger, initial_epoch): if not args.resume: if args.checkpoint or args.history or args.tensorboard: if os.path.isdir(f'{args.result_path}/{args.task}/{args.stamp}'): flag = input(f'\n{args.task}/{args.stamp} is already saved. ' 'Do you want new stamp? (y/n) ') if flag == 'y': args.stamp = create_stamp() initial_epoch = 0 logger.info(f'New stamp {args.stamp} will be created.') elif flag == 'n': return -1, initial_epoch else: logger.info(f'You must select \'y\' or \'n\'.') return -2, initial_epoch os.makedirs(f'{args.result_path}/{args.task}/{args.stamp}') yaml.dump( vars(args), open( f'{args.result_path}/{args.task}/{args.stamp}/model_desc.yml', 'w'), default_flow_style=False) else: logger.info(f'{args.stamp} is not created due to ' f'checkpoint - {args.checkpoint} | ' f'history - {args.history} | ' f'tensorboard - {args.tensorboard}') callbacks = [MomentumUpdate(logger, args.momentum, args.epochs)] if args.checkpoint: os.makedirs(f'{args.result_path}/{args.task}/{args.stamp}/checkpoint', exist_ok=True) callbacks.append( ModelCheckpoint(filepath=os.path.join( f'{args.result_path}/{args.task}/{args.stamp}/checkpoint', '{epoch:04d}_{loss:.4f}_{loss_ij:.4f}_{loss_ji:.4f}'), monitor='loss', mode='min', verbose=1, save_weights_only=True)) if args.history: os.makedirs(f'{args.result_path}/{args.task}/{args.stamp}/history', exist_ok=True) callbacks.append( CustomCSVLogger( filename= f'{args.result_path}/{args.task}/{args.stamp}/history/epoch.csv', separator=',', append=True)) if args.tensorboard: callbacks.append( TensorBoard( log_dir=f'{args.result_path}/{args.task}/{args.stamp}/logs', histogram_freq=args.tb_histogram, write_graph=True, write_images=True, update_freq=args.tb_interval, profile_batch=100, )) return callbacks, initial_epoch
X = pickle.load(open("X.pickle", "rb")) y = pickle.load(open("y.pickle", "rb")) X = X / 255.0 dense_layers = [0] layer_sizes = [64] conv_layers = [3] for dense_layer in dense_layers: for layer_size in layer_sizes: for conv_layer in conv_layers: NAME = f"{conv_layer}-conv-{layer_size}-nodes-{dense_layer}-dense-{int(time.time())}" print(NAME) tensorboard = TensorBoard(log_dir=f"logs\{NAME}") model = Sequential() model.add(Conv2D(layer_size, (3, 3), input_shape=X.shape[1:])) model.add(Activation("relu")) model.add(MaxPooling2D(pool_size=(2, 2))) for l in range(conv_layer - 1): model.add(Conv2D(layer_size, (3, 3), input_shape=X.shape[1:])) model.add(Activation("relu")) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) for l in range(dense_layer): model.add(Dense(512))
def oned_convnet(opts, x, y, test=None, seed=235): """ Runs a 1D convolutional classification neural net on the input data x and y. :param x: List of training data x. :param y: List of corresponding categories for each vector in x. :param xtest: List of evaluation data. :param ytest: List of corresponding categories for each vector in x. :param seed: Random seed. Defaults to 235 for reproducibility purposes, but should be set randomly in an actual run. :param verbose: Either true or false. Controls level of output. :param save_path: The path to save the model to. If it points to a directory, writes to a file named the current unix time. If it points to a file, the file is overwritten. :return: None. """ verbose = 1 if opts.verbose else 0 # 1-hot encoding. categories = sorted(set(y)) encoder = OnehotEncoder(categories) # Split data into train and validation. test_size = float(opts.validation) / 100 xtrain, xval, ytrain, yval = train_test_split(x, encoder(y), test_size=test_size, random_state=seed) # We need to poke an extra dimension into our input data for some reason. xtrain, xval = fix_dims(xtrain, xval) nq, nlabels = x.shape[1], len(categories) # Check that the validation data covers all the categories #if categories != sorted(set(ytrain)): # raise ValueError("Training data is missing categories.") #if categories != sorted(set(yval)): # raise ValueError("Test data is missing categories.") tb = TensorBoard(log_dir=opts.tensorboard, histogram_freq=1) #es = EarlyStopping(min_delta=0.005, patience=5, verbose=verbose) basename = inepath(opts.save_path) checkpoint = ModelCheckpoint( filepath=basename + "-check.h5", # or "-check{epoch:03d}.h5", ## To keep best loss, and not overwrite every epoch. #monitor='loss', save_best_only=True, mode='auto', ) if opts.resume: model = reload_net(inepath(opts.save_path) + '.h5') else: # Begin model definitions model = Sequential() #model.add(Embedding(4000, 128, input_length=x.shape[1])) model.add(InputLayer(input_shape=(nq, 1))) model.add(Conv1D(nq, kernel_size=6, activation='relu')) model.add(MaxPooling1D(pool_size=4)) model.add(Dropout(.17676)) model.add(Conv1D(nq // 2, kernel_size=6, activation='relu')) model.add(MaxPooling1D(pool_size=4)) model.add(Dropout(.20782)) model.add(Flatten()) model.add(Dense(nq // 4, activation='tanh')) model.add(Dropout(.20582)) model.add(Dense(nlabels, activation='softmax')) loss = ('binary_crossentropy' if nlabels == 2 else 'categorical_crossentropy') model.compile(loss=loss, optimizer=keras.optimizers.Adadelta(), metrics=['accuracy']) if verbose > 0: print(model.summary()) # Model Run history = model.fit( xtrain, ytrain, batch_size=opts.batch, steps_per_epoch=opts.steps, epochs=opts.epochs, verbose=verbose, validation_data=(xval, yval), #callbacks=[tb, es, checkpoint], callbacks=[tb, checkpoint], ) # Check the results against the validation data. score = None if test is not None: if categories != sorted(set(test[1])): raise ValueError("Validation data has missing categories.") score = model.evaluate(test[0], encoder(test[1]), verbose=verbose) print('\nTest loss: ', score[0]) print('Test accuracy:', score[1]) save_output(save_path=opts.save_path, model=model, encoder=encoder, history=history, seed=seed, score=score) logging.info("Complete.")
# HYPERPARAMETERS AND DESIGN CHOICES num_neurons = 128 batch_size = 64 ACTIV_FN = "relu" activation_fn = cnn.get_activ_fn(ACTIV_FN) num_epochs = 50 learn_rate = 0.001 drop_prob = 0.1 optim = "Adam" # callbacks for Save weights, Tensorboard # creating a new directory for each run using timestamp folder = os.path.join(os.getcwd(), datetime.now().strftime("%d-%m-%Y_%H-%M-%S"), str(ACTIV_FN)) tb_callback = TensorBoard(log_dir=folder) # Build, train, and test model model = cnn.build_model(input_shape, activation_fn, learn_rate, drop_prob, num_neurons, num_classes) train_accuracy, train_loss, valid_accuracy, valid_loss = cnn.train_model( model, train_images, train_labels, batch_size, num_epochs, valid_images, valid_labels, tb_callback) test_accuracy, test_loss, predictions = cnn.test_model(model, test_images, test_labels) # # # save test set results to csv # predictions = np.round(predictions) # predictions = predictions.astype(int) # df = pd.DataFrame(predictions) # df.to_csv("mnist.csv", header=None, index=None)
model.add(Dense(32, activation='relu')) model.add(Dropout(0.2)) model.add(Dense(2, activation='softmax')) opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6) # Compile model model.compile( loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'] ) tensorboard = TensorBoard(log_dir="logs/{}".format(NAME)) filepath = "RNN_Final-{epoch:02d}-{val_acc:.3f}" # unique file name that will include the epoch and the validation acc for that epoch checkpoint = ModelCheckpoint("C:/Users/ptandy/Desktop/models/{}.model".format(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')) # saves only the best ones # Train model history = model.fit( train_x, train_y, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_data=(validation_x, validation_y), callbacks=[tensorboard, checkpoint], ) # Score model score = model.evaluate(validation_x, validation_y, verbose=0)
def train(config): input_width = config['model']['input_width'] input_height = config['model']['input_height'] label_file = config['model']['labels'] model_name = config['model']['name'] class_num = config['model']['class_num'] train_data_dir = config['train']['data_dir'] train_file_list = config['train']['file_list'] pretrained_weights = config['train']['pretrained_weights'] batch_size = config['train']['batch_size'] learning_rate = config['train']['learning_rate'] nb_epochs = config['train']['nb_epochs'] start_epoch = config['train']['start_epoch'] train_base = config['train']['train_base'] valid_data_dir = config['valid']['data_dir'] valid_file_list = config['valid']['file_list'] builder = ModelBuilder(config) filepath = train_file_list train_gen = builder.build_datagen(filepath) train_gen.save_labels(label_file) trainDataGen, train_steps_per_epoch = train_gen.from_frame(directory=train_data_dir) trainDs = tf.data.Dataset.from_generator( lambda: trainDataGen, output_types=(tf.float32, tf.float32), output_shapes=([batch_size,input_width,input_height,3], [batch_size,class_num]) ) options = tf.data.Options() options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.DATA trainDs = trainDs.with_options(options) filepath = valid_file_list valid_gen = builder.build_datagen(filepath, with_aug=False) validDataGen, valid_steps_per_epoch = valid_gen.from_frame(directory=valid_data_dir) validDs = tf.data.Dataset.from_generator( lambda: validDataGen, output_types=(tf.float32, tf.float32), output_shapes=([batch_size,input_width,input_height,3], [batch_size,class_num]) ) options = tf.data.Options() options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.DATA validDs = validDs.with_options(options) # define checkpoint dataset_name = model_name dirname = 'ckpt-' + dataset_name if not os.path.exists(dirname): os.makedirs(dirname) timestr = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") filepath = os.path.join(dirname, 'weights-%s-%s-{epoch:02d}-{val_accuracy:.2f}.hdf5' %(model_name, timestr)) checkpoint = ModelCheckpoint(filepath=filepath, monitor='val_accuracy', # acc outperforms loss verbose=1, save_best_only=True, save_weights_only=True, period=5) # define logs for tensorboard tensorboard = TensorBoard(log_dir='logs', histogram_freq=0) wgtdir = 'weights' if not os.path.exists(wgtdir): os.makedirs(wgtdir) # train # tf2.5 strategy = tf.distribute.MirroredStrategy() print("Number of devices: {}".format(strategy.num_replicas_in_sync)) # Open a strategy scope. with strategy.scope(): model = builder.build_model() # tf2.5 if class_num == 2: model.compile(optimizer=tf.optimizers.Adam(learning_rate=learning_rate), loss='categorical_crossentropy',metrics=['accuracy']) else: model.compile(optimizer=tf.optimizers.Adam(learning_rate=learning_rate), loss='sparse_categorical_crossentropy',metrics=['accuracy']) model.summary() # Load weight of unfinish training model(optional) if pretrained_weights != '': model.load_weights(pretrained_weights) model.fit(trainDs, batch_size = batch_size, steps_per_epoch=train_steps_per_epoch, validation_data = validDs, validation_steps=valid_steps_per_epoch, initial_epoch=start_epoch, epochs=nb_epochs, callbacks=[checkpoint,tensorboard], use_multiprocessing=True, workers=16) model_file = '%s_%s.h5' % (model_name,timestr) model.save(model_file) print('save model to %s' % (model_file))
model.add(Dense(hp.Choice('dense_units', values=[0, 10]))) model.add(Activation("softmax")) #lr = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4]) model.compile( optimizer=tf.keras.optimizers.Adam(), # optimizer=keras.optimizers.Adam( # hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])), loss="sparse_categorical_crossentropy", metrics=["accuracy"]) return model import datetime import os LOG_DIR = f"{os.getcwd()}/logs/{datetime.datetime.now().strftime('%m.%d-%H.%M')}" tensorboard = TensorBoard(log_dir=LOG_DIR) showtime = ShowTime() my_callbacks = [tensorboard, showtime] # tuner = RandomSearch( # build_model, # objective = "val_accuracy", # max_trials = 1, # executions_per_trial = 1, # directory = LOG_DIR # ) tuner = Hyperband(build_model, objective="val_accuracy", max_epochs=2, directory=LOG_DIR)
data_path = params['data']['data_path'] log_path = params['data']['log_path'] model_path = params['data']['model_path'] optuna_args_dict = params['optuna'] hps_dict = params['hparams'] data_args_list = params['data'] # SQLITE Connectiivity import sqlite3 con = sqlite3.connect(optuna_args_dict['name'] + '_Optuna.db') callbacks_list = [] log_dir="logs\\" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1) callbacks_list.append(tensorboard_callback) w_fn = 'models\\mnist-1-{}.h5'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='auto') callbacks_list.append(early_stopping) # Change the cooldown to 1, if behances unexpectedly learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy', patience=3, verbose=1, factor=0.5, min_lr=2.5e-5, cooldown=0) callbacks_list.append(learning_rate_reduction) model_checkpoint = ModelCheckpoint(w_fn, monitor='val_accuracy', save_best_only=True) callbacks_list.append(model_checkpoint) class ExperimentManager(object): def __init__(self, model, optuna_args_dict, hps_dict, data_args_list, callbacks_list): self.optuna_args_dict = optuna_args_dict
def train(self, z_noise=None): start_time = time.time() # log writer logdir = self.log_dir + '/' + self.model_dir tensorboard_callback = TensorBoard(log_dir=logdir) tensorboard_callback.set_model(self.gan) scalar_names = ['d_loss', 'g_loss'] for epoch in range(self.start_epoch, self.epoch): # get batch data for idx in range(self.start_batch_id, self.iteration): # train generator noise = np.random.normal(0, 1, (self.batch_size, self.z_dim)) y_gen = np.ones(self.batch_size) g_loss = self.gan.train_on_batch(noise, y_gen) # train discriminator half_batch = self.batch_size // 2 real_x, _ = next(self.train_generator) while real_x.shape[0] != half_batch: real_x, _ = next(self.train_generator) noise = np.random.normal(0, 1, (half_batch, self.z_dim)) fake_x = self.generator.predict(noise) real_y = np.ones(half_batch) real_y[:] = 0.9 fake_y = np.zeros(half_batch) d_loss_real = self.discriminator.train_on_batch(real_x, real_y) d_loss_fake = self.discriminator.train_on_batch(fake_x, fake_y) d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) # write summary # self.write_log(tensorboard_callback, scalar_names, [d_loss, g_loss], self.counter) # display training status self.counter += 1 print("Epoch: [%2d] [%5d/%5d] time: %4.4f, d_loss: %.8f, g_loss: %.8f" \ % (epoch, idx, self.iteration, time.time() - start_time, d_loss, g_loss)) # save training results for every n steps if type(z_noise) is np.ndarray: sample_num = z_noise.shape[0] else: sample_num = self.sample_num z_noise = np.random.normal(0, 1, (sample_num, self.z_dim)) if np.mod(idx+1, self.print_freq) == 0: sample_imgs = self.generator.predict(z_noise) manifold = int(np.ceil(np.sqrt(sample_num))) save_images_plt(sample_imgs, [manifold, manifold], f'{self.sample_dir}/{self.model_name}_train_{epoch:02d}_{idx+1:05d}', mode='sample') if np.mod(idx+1, self.save_freq) == 0: self.save(self.checkpoint_dir, self.counter) # After an epoch, start_batch_id is set to zero # non-zero value is only for the first epoch after loading pre-trained model self.start_batch_id = 0 # save model self.save(self.checkpoint_dir, self.counter) # save model for the final step self.save(self.checkpoint_dir, self.counter)
def train_encoder(self): self.auto_encoder.compile(optimizer='adam', loss='binary_crossentropy') self.auto_encoder.fit( self.train_X, self.train_X, epochs=800, batch_size=8192, shuffle=True, validation_data=(self.test_X, self.test_X), callbacks=[TensorBoard(log_dir='/tmp/autoencoder')] )
) model.summary() # model.trainable = False model.save('C:/nmb/nmb_data/h5/5s/EfficientNet/efficientnet_sgd_1.h5') # 컴파일, 훈련 op = SGD(lr=1e-3) batch_size = 4 es = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True, verbose=1) lr = ReduceLROnPlateau(monitor='val_loss', vactor=0.5, patience=10, verbose=1) path = 'C:/nmb/nmb_data/h5/5s/EfficientNet/efficientnet_sgd_1.h5' mc = ModelCheckpoint(path, monitor='val_loss', verbose=1, save_best_only=True) tb = TensorBoard(log_dir='C:/study/graph/'+ start_now.strftime("%Y%m%d-%H%M%S") + "/",histogram_freq=0, write_graph=True, write_images=True) model.compile(optimizer=op, loss="sparse_categorical_crossentropy", metrics=['acc']) history = model.fit(x_train, y_train, epochs=1000, batch_size=batch_size, validation_split=0.2, callbacks=[es, lr, mc, tb]) # 평가, 예측 model = load_model('C:/nmb/nmb_data/h5/5s/EfficientNet/efficientnet_sgd_1.h5') # model.load_weights('C:/nmb/nmb_data/h5/5s/EfficientNet/efficientnet_sgd_1.h5') result = model.evaluate(x_test, y_test, batch_size=8) print("loss : {:.5f}".format(result[0])) print("acc : {:.5f}".format(result[1])) ############################################ PREDICT #################################### pred = ['C:/nmb/nmb_data/predict_04_26/F', 'C:/nmb/nmb_data/predict_04_26/M']
loss=tf.keras.losses.categorical_crossentropy, metrics=[tf.keras.metrics.categorical_accuracy]) ################ 保存模型以及可视化 ################### checkpoint_save_path = r"./model/VGG16-Header-model/VGG16-Header-model.ckpt" if os.path.exists(checkpoint_save_path + '.index'): print('-------------load the model-----------------') model.load_weights(checkpoint_save_path) cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_save_path, save_weights_only=True, save_best_only=True) tensorBoard_callback = TensorBoard( histogram_freq=1, write_graph=True, write_images=True, update_freq='epoch', embeddings_freq=0, ) # 设置存储路径可能导致程序报错 # tensorboard --logdir=E:\pycharm\tensorflow-learn\paper\Intra-Domain_Transfer_Learning_and_Stacked_Generalization\logs ################ 开始训练 ##################### 30 history = model.fit(x_train, y_train, batch_size=64, epochs=10, validation_split=0.1, callbacks=[cp_callback, tensorBoard_callback]) model.summary()
model.summary() # 컴파일, 훈련 model.compile(loss='mse', optimizer='adam', metrics=['mse']) es = EarlyStopping(monitor='val_loss', patience=7, mode='auto') cp = ModelCheckpoint( filepath='./model/keras60_{epoch:02d}_{val_loss:.4f}.hdf5', monitor='val_loss', save_best_only=True, mode='auto') hist = TensorBoard(log_dir='graph', histogram_freq=0, write_graph=True, write_images=True) model.fit(x_train, y_train, epochs=1000, batch_size=32, verbose=1, validation_split=0.5, callbacks=[es, cp, hist]) # 평가, 예측 loss, mse = model.evaluate(x_test, y_test, batch_size=32) print("loss : ", loss) print("mse : ", mse)
] = Utilities.loadImagesAndCategories(images, imgsDir, categories, normalizedDataPath, 2, inputWidth, inputHeight) model = cnn.create_model(inputWidth, inputHeight, 1, outputNo) # change to cnn input format df_im = np.asarray(images) df_im = df_im.reshape(df_im.shape[0], inputWidth, inputHeight, 1) df_cat = np.asarray(categories) df_cat = df_cat.reshape(df_cat.shape[0], outputNo) tr_im, val_im, tr_cat, val_cat = train_test_split(df_im, df_cat, test_size=0.2) tensorboard = TensorBoard(log_dir=imgsDir + "logs_img1" + "\{}".format(time())) model_name = "model_phase02.h5" callbacks = [ EarlyStopping(monitor='val_accuracy', mode='max', patience=50, verbose=1), keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', mode='max', factor=0.5, patience=15, min_lr=0.000001, verbose=1), ModelCheckpoint(model_name, monitor='val_accuracy',
model.summary() model.save('C:/nmb/nmb_data/h5/5s_last/model_speech_vgg.h5') start = datetime.now() op = Adadelta(lr=1e-4) batch_size = 8 model.compile(optimizer=op, loss="sparse_categorical_crossentropy", metrics=['acc']) es = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True, verbose=1) lr = ReduceLROnPlateau(monitor='val_loss', vactor=0.5, patience=10, verbose=1) path = 'C:/nmb/nmb_data/h5/5s_last/speech_vgg_adadelta.h5' mc = ModelCheckpoint(path, monitor='val_loss', verbose=1, save_best_only=True) tb = TensorBoard(log_dir='C:/nmb/nmb_data/graph/'+ 'speech_vgg_adadelta' + "/",histogram_freq=0, write_graph=True, write_images=True) # history = model.fit(x_train, y_train, epochs=5000, batch_size=batch_size, validation_split=0.2, callbacks=[es, lr, mc, tb]) # 평가, 예측 model.load_weights('C:/nmb/nmb_data/h5/5s_last/speech_vgg_adadelta.h5') result = model.evaluate(x_test, y_test, batch_size=batch_size) print("loss : {:.5f}".format(result[0])) print("acc : {:.5f}".format(result[1]) + '\n') ############################################ PREDICT #################################### pred = ['C:/nmb/nmb_data/predict/5s_last/F','C:/nmb/nmb_data/predict/5s_last/M'] count_f = 0 count_m = 0
def main(_argv): # physical_devices = tf.config.experimental.list_physical_devices('GPU') # for physical_device in physical_devices: # tf.config.experimental.set_memory_growth(physical_device, True) model = YoloV3Tiny(FLAGS.size, training=True, classes=FLAGS.num_classes) anchors = yolo_tiny_anchors anchor_masks = yolo_tiny_anchor_masks # if FLAGS.dataset: # train_dataset = dataset.load_tfrecord_dataset( # FLAGS.dataset, FLAGS.classes, FLAGS.size) # else: # modified the train dataset to have a fake depth channel train_dataset = dataset.load_fake_dataset() train_dataset = train_dataset.shuffle(buffer_size=512) train_dataset = train_dataset.batch(FLAGS.batch_size) train_dataset = train_dataset.map(lambda x, y: ( dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))) train_dataset = train_dataset.prefetch( buffer_size=tf.data.experimental.AUTOTUNE) # if FLAGS.val_dataset: # val_dataset = dataset.load_tfrecord_dataset( # FLAGS.val_dataset, FLAGS.classes, FLAGS.size) # else: val_dataset = dataset.load_fake_dataset() val_dataset = val_dataset.batch(FLAGS.batch_size) val_dataset = val_dataset.map(lambda x, y: ( dataset.transform_images(x, FLAGS.size), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))) # Configure the model for transfer learning # if FLAGS.transfer == 'none': # pass # Nothing to do # elif FLAGS.transfer in ['darknet', 'no_output']: # # Darknet transfer is a special case that works # # with incompatible number of classes # # model_pretrained = YoloV3Tiny( # FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes) # model_pretrained.load_weights(FLAGS.weights) # if FLAGS.transfer == 'darknet': # model.get_layer('yolo_darknet').set_weights( # model_pretrained.get_layer('yolo_darknet').get_weights()) # freeze_all(model.get_layer('yolo_darknet')) # # elif FLAGS.transfer == 'no_output': # for l in model.layers: # if not l.name.startswith('yolo_output'): # l.set_weights(model_pretrained.get_layer( # l.name).get_weights()) # freeze_all(l) # else: # # All other transfer require matching classes # model.load_weights(FLAGS.weights) # if FLAGS.transfer == 'fine_tune': # # freeze darknet and fine tune other layers # darknet = model.get_layer('yolo_darknet') # freeze_all(darknet) # elif FLAGS.transfer == 'frozen': # # freeze everything # freeze_all(model) optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate) loss = [ YoloLoss(anchors[mask], classes=FLAGS.num_classes) for mask in anchor_masks ] # # if FLAGS.mode == 'eager_tf': # # Eager mode is great for debugging # # Non eager graph mode is recommended for real training # avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32) # avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32) # # for epoch in range(1, FLAGS.epochs + 1): # for batch, (images, labels) in enumerate(train_dataset): # with tf.GradientTape() as tape: # outputs = model(images, training=True) # regularization_loss = tf.reduce_sum(model.losses) # pred_loss = [] # for output, label, loss_fn in zip(outputs, labels, loss): # pred_loss.append(loss_fn(label, output)) # total_loss = tf.reduce_sum(pred_loss) + regularization_loss # # grads = tape.gradient(total_loss, model.trainable_variables) # optimizer.apply_gradients( # zip(grads, model.trainable_variables)) # # logging.info("{}_train_{}, {}, {}".format( # epoch, batch, total_loss.numpy(), # list(map(lambda x: np.sum(x.numpy()), pred_loss)))) # avg_loss.update_state(total_loss) # # for batch, (images, labels) in enumerate(val_dataset): # outputs = model(images) # regularization_loss = tf.reduce_sum(model.losses) # pred_loss = [] # for output, label, loss_fn in zip(outputs, labels, loss): # pred_loss.append(loss_fn(label, output)) # total_loss = tf.reduce_sum(pred_loss) + regularization_loss # # logging.info("{}_val_{}, {}, {}".format( # epoch, batch, total_loss.numpy(), # list(map(lambda x: np.sum(x.numpy()), pred_loss)))) # avg_val_loss.update_state(total_loss) # # logging.info("{}, train: {}, val: {}".format( # epoch, # avg_loss.result().numpy(), # avg_val_loss.result().numpy())) # # avg_loss.reset_states() # avg_val_loss.reset_states() # model.save_weights( # 'checkpoints/yolov3_train_{}.tf'.format(epoch)) #else: model.compile(optimizer=optimizer, loss=loss, run_eagerly=(FLAGS.mode == 'eager_fit')) callbacks = [ ReduceLROnPlateau(verbose=1), EarlyStopping(patience=3, verbose=1), ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf', verbose=1, save_weights_only=True), TensorBoard(log_dir='logs') ] history = model.fit(train_dataset, epochs=FLAGS.epochs, callbacks=callbacks, validation_data=val_dataset)
class TimeHistory(tf.keras.callbacks.Callback): def on_train_begin(self, logs={}): self.times = [] def on_epoch_begin(self, epoch, logs={}): self.epoch_time_start = time.time() def on_epoch_end(self, epoch, logs={}): self.times.append(time.time() - self.epoch_time_start) # Callbacks terminateOnNan = TerminateOnNaN() earlyStopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=20, verbose=1, mode='min', baseline=None, restore_best_weights=True) reduceOnPlateau = ReduceLROnPlateau(monitor='val_loss', factor=0.6, patience=6, min_lr=0.0001) modelCheckpoint = ModelCheckpoint(f'checkpoints/{model_name}.h5', monitor='loss', verbose=0, save_best_only=True, mode='min') tensorboard = TensorBoard(log_dir=f'logs/{model_name}') time_callback = TimeHistory() # Train model history = model.fit( trainX, trainY, epochs=EPOCHS, validation_data=(testX, testY), batch_size=BATCH_SIZE, shuffle=True, callbacks=[ terminateOnNan, earlyStopping, reduceOnPlateau, modelCheckpoint, tensorboard,
# accuracy. Scale the learning rate `lr = 1.0` ---> `lr = 1.0 * hvd.size()` during # the first three epochs. See https://arxiv.org/abs/1706.02677 for details. hvd.callbacks.LearningRateWarmupCallback(warmup_epochs=3, verbose=1), ] # ### Learning # We'll use TensorBoard to visualize our progress during training. # Horovod: logfile = "dvc-cnn-simple-{}-".format(hvd.rank()) logfile = logfile+datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S') logdir = os.path.join(os.getcwd(), "logs", logfile) print('Rank:', hvd.rank(), 'TensorBoard log directory:', logdir) os.makedirs(logdir) callbacks.append(TensorBoard(log_dir=logdir)) # Horovod: reduce epochs epochs = 20 // hvd.size() # Horovod: write logs on worker 0. verbose = 2 if hvd.rank() == 0 else 0 history = model.fit(train_dataset, epochs=epochs, validation_data=validation_dataset, callbacks=callbacks, verbose=verbose) # Horovod: if hvd.rank() == 0: fname = "dvc-cnn-simple-hvd.h5" print('Saving model to', fname)
earlystop = EarlyStopping(monitor="val_loss", patience=20, verbose=1) earlystop.set_model(model) earlystop.on_train_begin() modelcheckpoint = ModelCheckpoint(filepath="weights/", monitor="val_loss", verbose=1, save_best_only=True) modelcheckpoint.set_model(model) modelcheckpoint.on_train_begin() reduce_lr = ReduceLROnPlateau(monitor="val_loss", patience=10, verbose=1) reduce_lr.set_model(model) reduce_lr.on_train_begin() tensorboard = TensorBoard(log_dir="logs/") tensorboard.set_model(model) tensorboard.on_train_begin() epochs = 3 train_logs_dict = {} test_logs_dict = {} for epoch in range(epochs): training_acc, testing_acc, training_loss, testing_loss = [], [], [], [] print("\nStart of epoch %d" % (epoch + 1, )) # Iterate over the batches of the dataset. modelcheckpoint.on_epoch_begin(epoch) earlystop.on_epoch_begin(epoch) reduce_lr.on_epoch_begin(epoch) tensorboard.on_epoch_begin(epoch) for x_batch_train, y_batch_train in get_batch(batch_size, x_train,