def fit(self, **fit_params): if 'callbacks' in fit_params.keys(): fit_params['callbacks'].append(sparsity.UpdatePruningStep()) else: fit_params['callbacks'] = [sparsity.UpdatePruningStep()] return self._model.fit(**fit_params)
def train_model(m, name, earlyStop=True): #checkpoint = ModelCheckpoint("logs/checkpoints/" + name + ".h5", monitor='val_accuracy', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1) early = EarlyStopping(monitor='accuracy', min_delta=0, patience=20, verbose=1, mode='auto') pruning = sparsity.UpdatePruningStep() tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir="logs/fit/" + RUN_ID + "/" + name, histogram_freq=1) if earlyStop: callbcks = [tensorboard_callback, pruning, early] else: callbcks = [tensorboard_callback, pruning] hst = m.fit(x=train['features'], y=tf.keras.utils.to_categorical(train['labels'], 10), validation_data=(test['features'], tf.keras.utils.to_categorical( test['labels'], 10)), epochs=EPOCHS, steps_per_epoch=STEPS_EPOCH, callbacks=callbcks) with open(HIST_DIR + name + '-' + RUN_DATE + '.json', 'w') as f: f.write(str(hst.history).replace('\'', '"')) f.close() return hst
def train_pruned_model(model, dataset, vocab): pruned_model = model(vocab_size=len(vocab), embedding_dim=FLAGS.embedding_dim, rnn_units=FLAGS.RNN_units, batch_size=FLAGS.batch_size) logdir = tempfile.mkdtemp() callbacks = [ sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=logdir, profile_batch=0) ] pruned_model.compile(optimizer='adam', loss=loss) pruned_model.fit(dataset, epochs=FLAGS.num_epochs, callbacks=callbacks) # Save the pruned model for size comparison later _, checkpoint_file = tempfile.mkstemp(str(FLAGS.final_sparsity) + '_pruned.h5', dir='models/') print('Saving pruned model to: ', checkpoint_file) tf.keras.models.save_model(pruned_model, checkpoint_file, include_optimizer=False) # Strip the pruning wrappers from the pruned model as it is only needed for training final_pruned_model = sparsity.strip_pruning(pruned_model) _, pruned_keras_file = tempfile.mkstemp('_final_pruned.h5', dir='models/') print('Saving pruned model to: ', pruned_keras_file) tf.keras.models.save_model(final_pruned_model, pruned_keras_file, include_optimizer=False) return pruned_model, final_pruned_model
def sparsePrune(logger, model, X_train, Y_train, X_test, Y_test, num_train_samples, batch_size, epochs, initSparse, endSparse): end_step = np.ceil(1.0 * num_train_samples / batch_size).astype( np.int32) * epochs # TODO determine how to limit this pruning to retain 90% of the network weights / size new_pruning_params = { 'pruning_schedule': sparsity.PolynomialDecay(initial_sparsity=initSparse, final_sparsity=endSparse, begin_step=0, end_step=end_step, frequency=100) } new_model = sparsity.prune_low_magnitude(model, **new_pruning_params) new_model.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy']) callbacks = [ sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=None, profile_batch=0) ] new_model.fit(X_train, Y_train, batch_size=batch_size, epochs=epochs, verbose=1, callbacks=callbacks, validation_data=(X_test, Y_test)) score = new_model.evaluate(X_test, Y_test, verbose=0) logger.info('Sparsely Pruned Network Experiment-Results') logger.info('Test loss:', score[0]) logger.info('Test accuracy:', score[1]) return new_model
def prun_model(params): import tensorflow_model_optimization as tfmot from tensorflow_model_optimization.sparsity import keras as sparsity save_path = os.path.join(params.model_path, "pruned_%s.h5" % params.net) pretrain_path = os.path.join(params.model_path, "%s.h5" % params.net) models_a = tf.python.keras.models.load_model(pretrain_path, custom_objects={ "tf": tf, "white_norm": white_norm, "Squeeze": Squeeze, "cls_ohem": cls_ohem, "bbox_ohem": bbox_ohem, "landmark_ohem": landmark_ohem, "accuracy": accuracy }) pruning_schedule = tfmot.sparsity.keras.PolynomialDecay( initial_sparsity=0.5, final_sparsity=0.9, begin_step=0, end_step=5000 * 4) model_for_pruning = tfmot.sparsity.keras.prune_low_magnitude( models, pruning_schedule=pruning_schedule) # 必须是tf.keras.Model, 否则返回None, 详见源码 input_size, loss_weight, loss_func = gen_traning_params(params.net) model_for_pruning.compile(optimizer=Adam(lr=params.lr), loss=loss_func, loss_weights=loss_weight, metrics={"prune_low_magnitude_class": accuracy}) train_gen = DataGenetator(trainset, input_size=input_size, batch_size=batch_size) validation_gen = DataGenetator(testset, input_size=input_size, batch_size=batch_size, is_training=False) callbacks = [ ModelCheckpoint(save_path, monitor='val_prune_low_magnitude_class_accuracy', verbose=1, save_best_only=True, mode='max'), sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir="./logs", profile_batch=0) ] history = model_for_pruning.fit_generator( train_gen, workers=params.worker, use_multiprocessing=(params.worker > 1), steps_per_epoch=len(trainset) / batch_size, epochs=80, callbacks=callbacks, validation_data=validation_gen, validation_steps=len(testset) / batch_size * 3)
def train(): (x_train, y_train), (x_test, y_test) = input_fn() num_train_samples = x_train.shape[0] end_step = np.ceil(1.0 * num_train_samples / BATCH_SIZE).astype( np.int32) * EPOCHS print('End step: ' + str(end_step)) print('Train input shape: ', x_train.shape) pruning_params = { 'pruning_schedule': sparsity.PolynomialDecay(initial_sparsity=0.20, final_sparsity=0.95, begin_step=500, end_step=end_step, frequency=100) } pruned_model = get_model(input_shape=x_train.shape[1:], pruning_params=pruning_params) pruned_model.compile(loss=tf.keras.losses.sparse_categorical_crossentropy, optimizer='adam', metrics=['accuracy']) pruned_model.summary() initial_ws = save_weights(pruned_model) logdir = 'logs' # Add a pruning step callback to peg the pruning step to the optimizer's # step. Also add a callback to add pruning summaries to tensorboard callbacks = [ sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=logdir, profile_batch=0), ResetCallback(initial_ws, 2) ] pruned_model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=1, callbacks=callbacks, validation_data=(x_test, y_test)) score = pruned_model.evaluate(x_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1])
def get_callbacks(self): """ Set-up the requiested callbacks to be entered into the model. Returns: callbacks (list): A list of callbacks. Notes: MyStopping is never used. """ callbacks = [] if self["prune"]: callbacks.append(sparsity.UpdatePruningStep()) if self["tensorboard"]: raise ValueError("Tensorboard not implemented yet") callbacks.append(keras.callbacks.TensorBoard(log_dir=os.path.join(self.log_dir,"tensorboard"), histogram_freq=20)) if self["stopping"]: if True: callbacks.append(keras.callbacks.EarlyStopping(monitor='val_loss',min_delta=self["stopping_delta"],patience=self["stopping_patience"],verbose=1,restore_best_weights=True)) else: callbacks.append(keras.callbacks.MyStopping(monitor='val_loss',target=5,patience=stopping_patience,verbose=1,restore_best_weights=True)) return callbacks
def prune_Conv1D(final_sparsity, initial_sparsity=0.0, begin_step=0, frequency=100, version=""): # Set up some params nb_epoch = 50 # number of epochs to train on batch_size = 1024 # training batch size num_train_samples = X_train.shape[0] end_step = np.ceil(1.0 * num_train_samples / batch_size).astype( np.int32) * nb_epoch print("End step: ", end_step) pruning_params = { 'pruning_schedule': sparsity.PolynomialDecay(initial_sparsity=initial_sparsity, final_sparsity=final_sparsity, begin_step=begin_step, end_step=end_step, frequency=100) } l = tf.keras.layers dr = 0.5 # dropout rate (%) pruned_model = tf.keras.Sequential([ sparsity.prune_low_magnitude( l.Conv1D(128, 3, padding='valid', activation="relu", name="conv1", kernel_initializer='glorot_uniform', input_shape=in_shape), **pruning_params), sparsity.prune_low_magnitude( l.Conv1D(128, 3, padding='valid', activation="relu", name="conv2", kernel_initializer='glorot_uniform'), **pruning_params), l.MaxPool1D(2), sparsity.prune_low_magnitude( l.Conv1D(64, 3, padding='valid', activation="relu", name="conv3", kernel_initializer='glorot_uniform'), **pruning_params), sparsity.prune_low_magnitude( l.Conv1D(64, 3, padding='valid', activation="relu", name="conv4", kernel_initializer='glorot_uniform'), **pruning_params), l.Dropout(dr), sparsity.prune_low_magnitude( l.Conv1D(32, 3, padding='valid', activation="relu", name="conv5", kernel_initializer='glorot_uniform'), **pruning_params), sparsity.prune_low_magnitude( l.Conv1D(32, 3, padding='valid', activation="relu", name="conv6", kernel_initializer='glorot_uniform'), **pruning_params), l.Dropout(dr), l.MaxPool1D(2), l.Flatten(), sparsity.prune_low_magnitude( l.Dense(128, activation='relu', kernel_initializer='he_normal', name="dense1"), **pruning_params), sparsity.prune_low_magnitude( l.Dense(len(classes), kernel_initializer='he_normal', name="dense2"), **pruning_params), l.Activation('softmax') ]) pruned_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=["accuracy"]) pruned_model.summary() callbacks = [ sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=logdir, profile_batch=0) ] history = pruned_model.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=(X_val, Y_val), callbacks=callbacks) score = pruned_model.evaluate(X_test, Y_test, verbose=0) print("Test loss: ", score) #Save the model pruned_model = sparsity.strip_pruning(pruned_model) pruned_model.summary() # Save the model architecture print_model_to_json( pruned_model, './model/Conv1D-{}.json'.format(str(final_sparsity) + version)) # Save the weights pruned_model.save_weights( './model/Conv1D-{}.h5'.format(str(final_sparsity) + version))
def callbacks_init(): callbacks = [] callbacks.append(sparsity.UpdatePruningStep()) callbacks.append(sparsity.PruningSummaries(log_dir=config.tb_prune_dir, profile_batch=0)) return callbacks
def layer_pruned_model(): #Build a pruned model layer by layer epochs = 12 (x_train, y_train), (x_test, y_test) = prepare_data() num_train_samples = x_train.shape[0] end_step = np.ceil(1.0 * num_train_samples / batch_size).astype( np.int32) * epochs print('End step: ' + str(end_step)) pruning_params = { 'pruning_schedule': sparsity.PolynomialDecay(initial_sparsity=0.50, final_sparsity=0.90, begin_step=2000, end_step=end_step, frequency=100) } #build the model l = tf.keras.layers pruned_model = tf.keras.Sequential([ sparsity.prune_low_magnitude(l.Conv2D(32, 5, padding='same', activation='relu'), input_shape=input_shape, **pruning_params), l.MaxPooling2D((2, 2), (2, 2), padding='same'), l.BatchNormalization(), sparsity.prune_low_magnitude( l.Conv2D(64, 5, padding='same', activation='relu'), **pruning_params), l.MaxPooling2D((2, 2), (2, 2), padding='same'), l.Flatten(), sparsity.prune_low_magnitude(l.Dense(1024, activation='relu'), **pruning_params), l.Dropout(0.4), sparsity.prune_low_magnitude( l.Dense(num_classes, activation='softmax'), **pruning_params) ]) pruned_model.summary() logdir = tempfile.mkdtemp() print('Writing training logs to ' + logdir) # %tensorboard --logdir={logdir} # train the model pruned_model.compile(loss=tf.keras.losses.categorical_crossentropy, optimizer='adam', metrics=['accuracy']) callbacks = [ sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=logdir, profile_batch=0) ] pruned_model.fit(x_train, y_train, batch_size=batch_size, epochs=10, verbose=1, callbacks=callbacks, validation_data=(x_test, y_test)) score = pruned_model.evaluate(x_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) # Save and restore checkpoint_file = './pruned_checkpoint_file.h5' # _, checkpoint_file = tempfile.mkstemp('.h5') print('Saving pruned model to: ', checkpoint_file) # saved_model() sets include_optimizer to True by default. Spelling it out here # to highlight. tf.keras.models.save_model(pruned_model, checkpoint_file, include_optimizer=True) with sparsity.prune_scope(): restored_model = tf.keras.models.load_model(checkpoint_file) restored_model.fit(x_train, y_train, batch_size=batch_size, epochs=2, verbose=1, callbacks=callbacks, validation_data=(x_test, y_test)) start_test = time.time() score = restored_model.evaluate(x_test, y_test, verbose=0) end_test = time.time() print('Test latency:', end_test - start_test) print('Test loss:', score[0]) print('Test accuracy:', score[1]) final_model = sparsity.strip_pruning(pruned_model) final_model.summary() layer_pruned_file = './layer_pruned_file.h5' # _, layer_pruned_file = tempfile.mkstemp('.h5') print('Saving pruned model to: ', layer_pruned_file) tf.keras.models.save_model(final_model, layer_pruned_file, include_optimizer=False)
def train(cfg): epochs = cfg['epochs'] save_dir = cfg['save_dir'] if not os.path.exists(save_dir): os.mkdir(save_dir) shape = (int(cfg['height']), int(cfg['width']), 3) n_class = int(cfg['class_number']) batch_size = int(cfg['batch_size']) if cfg['model'] == 'mymodel': from model.my_model import MyModel model = MyModel(shape, n_class).build() if cfg['model'] == 'v2': from model.mobilenet_v2 import MyModel model = MyModel(shape, n_class).buildRaw() train_generator, validation_generator, count1, count2 = generate(batch_size, shape[:2], cfg['train_dir'], cfg['eval_dir']) print(count1, count2) earlystop = EarlyStopping(monitor='val_acc', patience=4, verbose=0, mode='auto') checkpoint = ModelCheckpoint(filepath=os.path.join("save", 'prune_e_{epoch:02d}_{val_loss:.3f}_{val_acc:.3f}.h5'), monitor='val_acc', save_best_only=False, save_weights_only=False) reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.1, patience=2, verbose=1, min_lr=1e-7) model_path = r'./save/v2' # x_train, y_train = train_generator.next() # num_train_samples = batch_size # x_test, y_test = validation_generator.next() loaded_model = tf.keras.models.load_model(os.path.join(model_path,'e_06_0.20_1.00.h5')) score = loaded_model.evaluate_generator(validation_generator, count2//batch_size) print('original Test loss:', score[0]) print('original Test accuracy:', score[1]) end_step = np.ceil(1.0 * count1 / batch_size).astype(np.int32) * epochs print(end_step) new_pruning_params = {'pruning_schedule': sparsity.PolynomialDecay(initial_sparsity=0.50, final_sparsity=0.90, begin_step=0, end_step=end_step, frequency=100)} new_pruned_model = sparsity.prune_low_magnitude(loaded_model, **new_pruning_params) #new_pruned_model.summary() opt = Adam(lr=float(0.0001)) new_pruned_model.compile(loss=tf.keras.losses.categorical_crossentropy, optimizer=opt, metrics=['acc']) #现在我们开始训练和修剪模型。 #Add a pruning step callback to peg the pruning step to the optimizer's #step. Also add a callback to add pruning summaries to tensorboard logdir = "./save/log" callbacks = [earlystop,checkpoint,reduce_lr, sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=logdir, profile_batch=0)] # new_pruned_model.fit(x_train, y_train, # batch_size=batch_size, # epochs=epochs, # verbose=1, # callbacks=callbacks, # validation_data=(x_test, y_test)) new_pruned_model.fit_generator(train_generator, validation_data=validation_generator, steps_per_epoch=100,#count1 // batch_size, validation_steps=count2 // batch_size, epochs=epochs, callbacks=callbacks) score = new_pruned_model.evaluate_generator(validation_generator, count2//batch_size) print('Test loss:', score[0]) print('Test accuracy:', score[1]) final_model = sparsity.strip_pruning(new_pruned_model) new_pruned_keras_file = "save/pruned_model.h5" tf.keras.models.save_model(final_model, new_pruned_keras_file, include_optimizer=False)
def train(self): bert_config = modeling.BertConfig.from_json_file(args.config_name) if args.max_seq_len > bert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the BERT model " "was only trained up to sequence length %d" % (args.max_seq_len, bert_config.max_position_embeddings)) tf.io.gfile.makedirs(args.output_dir) label_list = self._processor.get_labels() train_examples = self._processor.get_train_examples(args.data_dir) num_train_steps = int(len(train_examples) / args.batch_size * args.num_train_epochs) estimator, model = create_estimator( steps=num_train_steps, warmup_steps=num_train_steps * 0.1 ) train_file = os.path.join(args.output_dir, "train.tf_record") self.file_based_convert_examples_to_features(train_examples, label_list, args.max_seq_len, self._tokenizer, train_file) # tf.logging.info("***** Running training *****") # tf.logging.info(" Num examples = %d", len(train_examples)) # tf.logging.info(" Batch size = %d", args.batch_size) # tf.logging.info(" Num steps = %d", num_train_steps) train_input_fn = self.file_based_input_fn_builder(input_file=train_file, seq_length=args.max_seq_len, is_training=True, batch_size=args.batch_size, drop_remainder=True) # early_stopping = tf.contrib.estimator.stop_if_no_decrease_hook( # estimator, # metric_name='loss', # max_steps_without_decrease=10, # min_steps=num_train_steps) # estimator.train(input_fn=train_input_fn, hooks=[early_stopping]) if args.prune_enabled: class WarmRestart(tf.keras.callbacks.Callback): def on_epoch_end(self, epoch, logs={}): K.set_value(model.optimizer.iter_updates, 0) callbacks = [ sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=args.prune_logdir, profile_batch=0), WarmRestart() ] # smart keras model.fit(train_input_fn(None), epochs=args.num_train_epochs, steps_per_epoch=int(len(train_examples) / args.batch_size), verbose=1, callbacks=callbacks) else: hooks = [] if args.enable_early_stopping: import early_stopping early_stop = early_stopping.stop_if_no_decrease_hook(estimator, "loss", 2000) hooks.append(early_stop) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps, hooks=hooks) feature_columns = [tf.feature_column.numeric_column(x) for x in ['input_ids', 'input_mask', 'segment_ids']] serving_input_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn( tf.feature_column.make_parse_example_spec(feature_columns)) estimator.export_saved_model( export_dir_base=args.output_dir, serving_input_receiver_fn=serving_input_fn, experimental_mode=tf.estimator.ModeKeys.EVAL) model.reset_metrics() model.save(args.keras_model_path)
'pruning_schedule': tfmot.PolynomialDecay(initial_sparsity=0.50, final_sparsity=0.90, begin_step=0, end_step=end_step, frequency=100) } new_pruned_model = tfmot.prune_low_magnitude(model, **new_pruning_params) new_pruned_model.summary() opt = SGD(lr=0.001) new_pruned_model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) callbacks = [tfmot.UpdatePruningStep()] start_Time = time.time() # Training model training = new_pruned_model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.2, shuffle=True, callbacks=callbacks, verbose=1) training_Time = time.time() scores = new_pruned_model.evaluate(x_test, y_test, verbose=2) end_Time = time.time()
def run(network_type, epochs, pruning_epochs, tensorboard_log_dir, model_dir_path, _log, _run): if network_type == 'mlp' or network_type == 'mlp_regression': # don't use gpu for mlps os.environ['CUDA_VISIBLE_DEVICES'] = '' elif network_type == 'cnn': os.environ['CUDA_VISIBLE_DEVICES'] = '3' else: # use gpu for cnn_vggs os.environ['CUDA_VISIBLE_DEVICES'] = '3' assert network_type in ('mlp', 'mlp_regression', 'cnn', 'cnn_vgg') _log.info('Emptying model directory...') if model_dir_path.exists(): shutil.rmtree(model_dir_path) Path(model_dir_path).mkdir(parents=True) _log.info('Loading data...') (X_train, y_train), (X_test, y_test) = load_data() metrics = {} unpruned_model_path, pruned_model_path = get_two_model_paths() unpruned_model = create_model() _log.info('Training unpruned model...') metrics['unpruned'] = train_model(unpruned_model, X_train, y_train, X_test, y_test, unpruned_model_path, epochs=epochs, is_pruning=False) _log.info('Unpruned model sparsity: {}'.format( get_sparsity(unpruned_model))) save_weights(unpruned_model, unpruned_model_path) pruning_params = get_pruning_params(X_train.shape[0]) pruned_model = sparsity.prune_low_magnitude(unpruned_model, **pruning_params) pruning_callbacks = [ sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=tensorboard_log_dir, profile_batch=0) ] _log.info('Training pruned model...') metrics['pruned'] = train_model(pruned_model, X_train, y_train, X_test, y_test, pruned_model_path, epochs=pruning_epochs, is_pruning=True, callbacks=pruning_callbacks) _log.info('Pruned model sparsity: {}'.format(get_sparsity(pruned_model))) save_weights(pruned_model, pruned_model_path) ex.add_source_file(__file__) with open(model_dir_path / 'metrics.json', 'w') as f: json.dump(metrics, f, cls=NumpyEncoder) return metrics
def _main(args): global lr_base, total_epochs lr_base = args.learning_rate total_epochs = args.total_epoch annotation_file = args.annotation_file log_dir = 'logs/000/' classes_path = args.classes_path class_names = get_classes(classes_path) num_classes = len(class_names) if args.tiny_version: anchors_path = 'configs/tiny_yolo_anchors.txt' else: anchors_path = 'configs/yolo_anchors.txt' anchors = get_anchors(anchors_path) print("\nanchors = ", anchors) print("\nnum_classes = ", num_classes) # get freeze level according to CLI option if args.weights_path: freeze_level = 0 else: freeze_level = 1 if args.freeze_level is not None: freeze_level = args.freeze_level print("\n\nFREEZE LEVEL = ", freeze_level) # callbacks for training process logging = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=False, write_grads=False, write_images=False, update_freq='batch') checkpoint = ModelCheckpoint( log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5', monitor='val_loss', verbose=1, save_weights_only=False, save_best_only=True, period=5) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, verbose=1, cooldown=0, min_lr=1e-10) lr_scheduler = LearningRateScheduler(learning_rate_scheduler) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=30, verbose=1) terminate_on_nan = TerminateOnNaN() callbacks = [ logging, checkpoint, reduce_lr, early_stopping, terminate_on_nan ] # get train&val dataset dataset = get_dataset(annotation_file) if args.val_annotation_file: val_dataset = get_dataset(args.val_annotation_file) num_train = len(dataset) num_val = len(val_dataset) dataset.extend(val_dataset) else: val_split = args.val_split num_val = int(len(dataset) * val_split) num_train = len(dataset) - num_val # prepare model pruning config pruning_end_step = np.ceil(1.0 * num_train / args.batch_size).astype( np.int32) * args.total_epoch if args.model_pruning: pruning_callbacks = [ sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=log_dir, profile_batch=0) ] callbacks = callbacks + pruning_callbacks # prepare optimizer optimizer = get_optimizer(args.optimizer, args.learning_rate) # get train model model = get_yolo3_train_model(args.model_type, anchors, num_classes, weights_path=args.weights_path, freeze_level=freeze_level, optimizer=optimizer, label_smoothing=args.label_smoothing, model_pruning=args.model_pruning, pruning_end_step=pruning_end_step) # support multi-gpu training if args.gpu_num >= 2: model = multi_gpu_model(model, gpus=args.gpu_num) model.summary() # Train some initial epochs with frozen layers first if needed, to get a stable loss. input_shape = args.model_image_size assert (input_shape[0] % 32 == 0 and input_shape[1] % 32 == 0), 'Multiples of 32 required' batch_size = args.batch_size initial_epoch = 0 epochs = args.init_epoch print("Initial training stage") print( 'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(num_train, num_val, batch_size, input_shape)) model.fit_generator(data_generator_wrapper(dataset[:num_train], batch_size, input_shape, anchors, num_classes), steps_per_epoch=max(1, num_train // batch_size), validation_data=data_generator_wrapper( dataset[num_train:], batch_size, input_shape, anchors, num_classes), validation_steps=max(1, num_val // batch_size), epochs=epochs, initial_epoch=initial_epoch, callbacks=callbacks) # Apply Cosine learning rate decay only after # unfreeze all layers if args.cosine_decay_learning_rate: callbacks.remove(reduce_lr) callbacks.append(lr_scheduler) # Unfreeze the whole network for further training # NOTE: more GPU memory is required after unfreezing the body print("Unfreeze and continue training, to fine-tune.") for i in range(len(model.layers)): model.layers[i].trainable = True model.compile(optimizer=optimizer, loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) # recompile to apply the change if args.multiscale: # prepare multiscale config input_shape_list = get_multiscale_list(args.model_type, args.tiny_version) interval = args.rescale_interval # Do multi-scale training on different input shape # change every "rescale_interval" epochs for epoch_step in range(epochs + interval, args.total_epoch, interval): # shuffle train/val dataset for cross-validation if args.data_shuffle: np.random.shuffle(dataset) initial_epoch = epochs epochs = epoch_step # rescale input only from 2nd round, to make sure unfreeze stable if initial_epoch != args.init_epoch: input_shape = input_shape_list[random.randint( 0, len(input_shape_list) - 1)] print( 'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(num_train, num_val, batch_size, input_shape)) model.fit_generator( data_generator_wrapper(dataset[:num_train], batch_size, input_shape, anchors, num_classes), steps_per_epoch=max(1, num_train // batch_size), validation_data=data_generator_wrapper(dataset[num_train:], batch_size, input_shape, anchors, num_classes), validation_steps=max(1, num_val // batch_size), epochs=epochs, initial_epoch=initial_epoch, callbacks=callbacks) else: # Do single-scale training print( 'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(num_train, num_val, batch_size, input_shape)) model.fit_generator(data_generator_wrapper(dataset[:num_train], batch_size, input_shape, anchors, num_classes), steps_per_epoch=max(1, num_train // batch_size), validation_data=data_generator_wrapper( dataset[num_train:], batch_size, input_shape, anchors, num_classes), validation_steps=max(1, num_val // batch_size), epochs=args.total_epoch, initial_epoch=epochs, callbacks=callbacks) # Finally store model if args.model_pruning: model = sparsity.strip_pruning(model) model.save(log_dir + 'trained_final.h5')
def run(network_type, batch_size, epochs, pruning_epochs, tensorboard_log_dir, model_dir_path, _log, _run, num_iters): assert network_type in ('mlp', 'cnn') _log.info('Emptying model directory...') if model_dir_path.exists(): shutil.rmtree(model_dir_path) Path(model_dir_path).mkdir(parents=True) _log.info('Loading data...') (X_train, y_train), (X_test, y_test) = load_data() metrics = {} unpruned_model_path, _ = get_two_model_paths() pruned_model_paths = get_pruned_model_paths() init_model_path = get_init_model_path() unpruned_model = create_model() save_weights(unpruned_model, init_model_path) _log.info('Training unpruned model...') metrics['unpruned'] = train_model(unpruned_model, X_train, y_train, X_test, y_test, unpruned_model_path, epochs=epochs) _log.info('Unpruned model sparsity: {}'.format( get_sparsity(unpruned_model))) save_weights(unpruned_model, unpruned_model_path) begin_step = np.ceil(X_train.shape[0] / batch_size).astype( np.int32) * (pruning_epochs - 1) for i in range(num_iters): pruned_model_path = pruned_model_paths[i] sparsity_ratio = 1.0 - 0.8**(i + 1) pruning_params = get_pruning_params(X_train.shape[0], initial_sparsity=sparsity_ratio / 2.0, final_sparsity=sparsity_ratio, begin_step=begin_step) pruned_model = sparsity.prune_low_magnitude(unpruned_model, **pruning_params) pruning_callbacks = [ sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=tensorboard_log_dir, profile_batch=0) ] _log.info('Training pruned model...') metrics[f'iter{i+1}'] = train_model(pruned_model, X_train, y_train, X_test, y_test, pruned_model_path, epochs=pruning_epochs, callbacks=pruning_callbacks) _log.info('Pruned model sparsity: {}'.format( get_sparsity(pruned_model))) save_weights(pruned_model, pruned_model_path) ex.add_source_file(__file__) with open(model_dir_path / 'metrics.json', 'w') as f: json.dump(metrics, f, cls=NumpyEncoder) return metrics
dataset_name=args.input_name) fit_kwargs = { 'steps_per_epoch': n_train_steps, 'epochs': args.num_epochs } if args.validation_path: valid_gen, n_valid_steps = make_generator( args.validation_path, args.batch_size, features=features, n_vert_max=n_vert_max, dataset_name=args.input_name) fit_kwargs['validation_data'] = valid_gen fit_kwargs['validation_steps'] = n_valid_steps callbacks = [sparsity.UpdatePruningStep()] prune_model.fit_generator(train_gen, **fit_kwargs, callbacks=callbacks) else: if args.input_type == 'h5': from generators.h5 import make_dataset elif args.input_type == 'root': from generators.uproot_fixed import make_dataset elif args.input_type == 'root-sparse': from generators.uproot_jagged_keep import make_dataset inputs, truth, shuffle = make_dataset(args.train_path[0], features=features, n_vert_max=n_vert_max, dataset_name=args.input_name)
def main(args): annotation_file = args.annotation_file log_dir = os.path.join('logs', '000') classes_path = args.classes_path class_names = get_classes(classes_path) num_classes = len(class_names) anchors = get_anchors(args.anchors_path) num_anchors = len(anchors) # get freeze level according to CLI option if args.weights_path: freeze_level = 0 else: freeze_level = 1 if args.freeze_level is not None: freeze_level = args.freeze_level # callbacks for training process logging = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=False, write_grads=False, write_images=False, update_freq='batch') checkpoint = ModelCheckpoint(os.path.join( log_dir, 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'), monitor='val_loss', mode='min', verbose=1, save_weights_only=False, save_best_only=True, period=1) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, mode='min', patience=10, verbose=1, cooldown=0, min_lr=1e-10) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=50, verbose=1, mode='min') terminate_on_nan = TerminateOnNaN() callbacks = [ logging, checkpoint, reduce_lr, early_stopping, terminate_on_nan ] # get train&val dataset dataset = get_dataset(annotation_file) if args.val_annotation_file: val_dataset = get_dataset(args.val_annotation_file) num_train = len(dataset) num_val = len(val_dataset) dataset.extend(val_dataset) else: val_split = args.val_split num_val = int(len(dataset) * val_split) num_train = len(dataset) - num_val # assign multiscale interval if args.multiscale: rescale_interval = args.rescale_interval else: rescale_interval = -1 #Doesn't rescale # model input shape check input_shape = args.model_image_size assert (input_shape[0] % 32 == 0 and input_shape[1] % 32 == 0), 'model_image_size should be multiples of 32' # get different model type & train&val data generator if args.model_type.startswith( 'scaled_yolo4_') or args.model_type.startswith('yolo5_'): # Scaled-YOLOv4 & YOLOv5 entrance, use yolo5 submodule but now still yolo3 data generator # TODO: create new yolo5 data generator to apply YOLOv5 anchor assignment get_train_model = get_yolo5_train_model data_generator = yolo5_data_generator_wrapper # tf.keras.Sequence style data generator #train_data_generator = Yolo5DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, args.multi_anchor_assign) #val_data_generator = Yolo5DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign) tiny_version = False elif args.model_type.startswith('yolo3_') or args.model_type.startswith( 'yolo4_'): #if num_anchors == 9: # YOLOv3 & v4 entrance, use 9 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper # tf.keras.Sequence style data generator #train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, args.multi_anchor_assign) #val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign) tiny_version = False elif args.model_type.startswith( 'tiny_yolo3_') or args.model_type.startswith('tiny_yolo4_'): #elif num_anchors == 6: # Tiny YOLOv3 & v4 entrance, use 6 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper # tf.keras.Sequence style data generator #train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, args.multi_anchor_assign) #val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign) tiny_version = True elif args.model_type.startswith('yolo2_') or args.model_type.startswith( 'tiny_yolo2_'): #elif num_anchors == 5: # YOLOv2 & Tiny YOLOv2 use 5 anchors get_train_model = get_yolo2_train_model data_generator = yolo2_data_generator_wrapper # tf.keras.Sequence style data generator #train_data_generator = Yolo2DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval) #val_data_generator = Yolo2DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes) tiny_version = False else: raise ValueError('Unsupported model type') # prepare online evaluation callback if args.eval_online: eval_callback = EvalCallBack( args.model_type, dataset[num_train:], anchors, class_names, args.model_image_size, args.model_pruning, log_dir, eval_epoch_interval=args.eval_epoch_interval, save_eval_checkpoint=args.save_eval_checkpoint, elim_grid_sense=args.elim_grid_sense) callbacks.append(eval_callback) # prepare train/val data shuffle callback if args.data_shuffle: shuffle_callback = DatasetShuffleCallBack(dataset) callbacks.append(shuffle_callback) # prepare model pruning config pruning_end_step = np.ceil(1.0 * num_train / args.batch_size).astype( np.int32) * args.total_epoch if args.model_pruning: pruning_callbacks = [ sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=log_dir, profile_batch=0) ] callbacks = callbacks + pruning_callbacks # prepare optimizer optimizer = get_optimizer(args.optimizer, args.learning_rate, average_type=None, decay_type=None) # support multi-gpu training if args.gpu_num >= 2: # devices_list=["/gpu:0", "/gpu:1"] devices_list = ["/gpu:{}".format(n) for n in range(args.gpu_num)] strategy = tf.distribute.MirroredStrategy(devices=devices_list) print('Number of devices: {}'.format(strategy.num_replicas_in_sync)) with strategy.scope(): # get multi-gpu train model model = get_train_model(args.model_type, anchors, num_classes, weights_path=args.weights_path, freeze_level=freeze_level, optimizer=optimizer, label_smoothing=args.label_smoothing, elim_grid_sense=args.elim_grid_sense, model_pruning=args.model_pruning, pruning_end_step=pruning_end_step) else: # get normal train model model = get_train_model(args.model_type, anchors, num_classes, weights_path=args.weights_path, freeze_level=freeze_level, optimizer=optimizer, label_smoothing=args.label_smoothing, elim_grid_sense=args.elim_grid_sense, model_pruning=args.model_pruning, pruning_end_step=pruning_end_step) model.summary() # Transfer training some epochs with frozen layers first if needed, to get a stable loss. initial_epoch = args.init_epoch epochs = initial_epoch + args.transfer_epoch print("Transfer training stage") print( 'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(num_train, num_val, args.batch_size, input_shape)) #model.fit_generator(train_data_generator, model.fit_generator( data_generator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, multi_anchor_assign=args.multi_anchor_assign), steps_per_epoch=max(1, num_train // args.batch_size), #validation_data=val_data_generator, validation_data=data_generator( dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign), validation_steps=max(1, num_val // args.batch_size), epochs=epochs, initial_epoch=initial_epoch, #verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks) # Wait 2 seconds for next stage time.sleep(2) if args.decay_type or args.average_type: # rebuild optimizer to apply learning rate decay or weights averager, # only after unfreeze all layers if args.decay_type: callbacks.remove(reduce_lr) if args.average_type == 'ema' or args.average_type == 'swa': # weights averager need tensorflow-addons, # which request TF 2.x and have version compatibility import tensorflow_addons as tfa callbacks.remove(checkpoint) avg_checkpoint = tfa.callbacks.AverageModelCheckpoint( filepath=os.path.join( log_dir, 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'), update_weights=True, monitor='val_loss', mode='min', verbose=1, save_weights_only=False, save_best_only=True, period=1) callbacks.append(avg_checkpoint) steps_per_epoch = max(1, num_train // args.batch_size) decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch - args.transfer_epoch) optimizer = get_optimizer(args.optimizer, args.learning_rate, average_type=args.average_type, decay_type=args.decay_type, decay_steps=decay_steps) # Unfreeze the whole network for further tuning # NOTE: more GPU memory is required after unfreezing the body print("Unfreeze and continue training, to fine-tune.") if args.gpu_num >= 2: with strategy.scope(): for i in range(len(model.layers)): model.layers[i].trainable = True model.compile(optimizer=optimizer, loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) # recompile to apply the change else: for i in range(len(model.layers)): model.layers[i].trainable = True model.compile(optimizer=optimizer, loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) # recompile to apply the change print( 'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(num_train, num_val, args.batch_size, input_shape)) #model.fit_generator(train_data_generator, model.fit_generator( data_generator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, multi_anchor_assign=args.multi_anchor_assign), steps_per_epoch=max(1, num_train // args.batch_size), #validation_data=val_data_generator, validation_data=data_generator( dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign), validation_steps=max(1, num_val // args.batch_size), epochs=args.total_epoch, initial_epoch=epochs, #verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks) # Finally store model if args.model_pruning: model = sparsity.strip_pruning(model) model.save(os.path.join(log_dir, 'trained_final.h5'))
def main(args): annotation_file = args.annotation_file classes_path = args.classes_path class_names = get_classes(classes_path) num_classes = len(class_names) anchors = get_anchors(args.anchors_path) num_anchors = len(anchors) log_dir_path = args.log_directory try: log_dir = os.path.join('logs', log_dir_path) except TypeError: date_now = datetime.now() log_dir_folder_name = f'{date_now.strftime("%Y_%m_%d_%H%M%S")}_{args.model_type}_TransferEp_{args.transfer_epoch}_TotalEP_{args.total_epoch}' log_dir = os.path.realpath(os.path.join( 'logs', log_dir_folder_name )) # get freeze level according to CLI option if args.weights_path: freeze_level = 0 else: freeze_level = 1 if args.freeze_level is not None: freeze_level = args.freeze_level # How many percentage of layers to unfreeze in fine tuning unfreeze_level = args.unfreeze_level # callbacks for training process logging = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=False, write_grads=False, write_images=False, update_freq='batch') checkpoint = ModelCheckpoint( filepath=log_dir + os.sep + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5', monitor='val_loss', mode='min', verbose=1, save_weights_only=False, save_best_only=True, period=1 ) reduce_lr = ReduceLROnPlateau( monitor='val_loss', factor=0.5, mode='min', patience=10, verbose=1, cooldown=0, min_lr=1e-10 ) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=50, verbose=1, mode='min') terminate_on_nan = TerminateOnNaN() callbacks = [logging, checkpoint, reduce_lr, early_stopping, terminate_on_nan] # get train&val dataset dataset = get_dataset(annotation_file) if args.val_annotation_file: val_dataset = get_dataset(args.val_annotation_file) num_train = len(dataset) num_val = len(val_dataset) dataset.extend(val_dataset) else: val_split = args.val_split num_val = int(len(dataset) * val_split) num_train = len(dataset) - num_val # assign multiscale interval if args.multiscale: rescale_interval = args.rescale_interval else: rescale_interval = -1 # Doesn't rescale # model input shape check input_shape = args.model_image_size assert (input_shape[0] % 32 == 0 and input_shape[1] % 32 == 0), 'model_image_size should be multiples of 32' # get different model type & train&val data generator if num_anchors == 9: # YOLOv3 use 9 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper # tf.keras.Sequence style data generator # train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, args.multi_anchor_assign) # val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign) tiny_version = False elif num_anchors == 6: # Tiny YOLOv3 use 6 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper # tf.keras.Sequence style data generator # train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, args.multi_anchor_assign) # val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign) tiny_version = True elif num_anchors == 5: # YOLOv2 use 5 anchors get_train_model = get_yolo2_train_model data_generator = yolo2_data_generator_wrapper # tf.keras.Sequence style data generator # train_data_generator = Yolo2DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval) # val_data_generator = Yolo2DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes) tiny_version = False else: raise ValueError('Unsupported anchors number') # prepare online evaluation callback if args.eval_online: eval_callback = EvalCallBack( model_type=args.model_type, annotation_lines=dataset[num_train:], anchors=anchors, class_names=class_names, model_image_size=args.model_image_size, model_pruning=args.model_pruning, log_dir=log_dir, eval_epoch_interval=args.eval_epoch_interval, save_eval_checkpoint=args.save_eval_checkpoint, elim_grid_sense=args.elim_grid_sense ) callbacks.append(eval_callback) # prepare train/val data shuffle callback if args.data_shuffle: shuffle_callback = DatasetShuffleCallBack(dataset) callbacks.append(shuffle_callback) # prepare model pruning config pruning_end_step = np.ceil(1.0 * num_train / args.batch_size).astype(np.int32) * args.total_epoch if args.model_pruning: pruning_callbacks = [sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=log_dir, profile_batch=0)] callbacks = callbacks + pruning_callbacks # prepare optimizer optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=None) # support multi-gpu training if args.gpu_num >= 2: # devices_list=["/gpu:0", "/gpu:1"] devices_list = ["/gpu:{}".format(n) for n in range(args.gpu_num)] strategy = tf.distribute.MirroredStrategy(devices=devices_list) print('Number of devices: {}'.format(strategy.num_replicas_in_sync)) with strategy.scope(): # get multi-gpu train model model = get_train_model( model_type=args.model_type, anchors=anchors, num_classes=num_classes, weights_path=args.weights_path, freeze_level=freeze_level, optimizer=optimizer, label_smoothing=args.label_smoothing, elim_grid_sense=args.elim_grid_sense, model_pruning=args.model_pruning, pruning_end_step=pruning_end_step ) else: # get normal train model model = get_train_model( model_type=args.model_type, anchors=anchors, num_classes=num_classes, weights_path=args.weights_path, freeze_level=freeze_level, optimizer=optimizer, label_smoothing=args.label_smoothing, elim_grid_sense=args.elim_grid_sense, model_pruning=args.model_pruning, pruning_end_step=pruning_end_step ) if args.show_history: model.summary() layers_count = len(model.layers) print(f'Total layers: {layers_count}') # Transfer training some epochs with frozen layers first if needed, to get a stable loss. initial_epoch = args.init_epoch epochs = initial_epoch + args.transfer_epoch print("Transfer training stage") print('Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'.format(num_train, num_val, args.batch_size, input_shape)) # model.fit_generator(train_data_generator, """ Transfer training steps, train with freeze layers """ model.fit( data_generator( annotation_lines=dataset[:num_train], batch_size=args.batch_size, input_shape=input_shape, anchors=anchors, num_classes=num_classes, enhance_augment=args.enhance_augment, rescale_interval=rescale_interval, multi_anchor_assign=args.multi_anchor_assign ), steps_per_epoch=max(1, num_train // args.batch_size), # validation_data=val_data_generator, validation_data=data_generator( annotation_lines=dataset[num_train:], batch_size=args.batch_size, input_shape=input_shape, anchors=anchors, num_classes=num_classes, multi_anchor_assign=args.multi_anchor_assign ), validation_steps=max(1, num_val // args.batch_size), epochs=epochs, initial_epoch=initial_epoch, # verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks ) # Wait 2 seconds for next stage time.sleep(2) if args.decay_type: # rebuild optimizer to apply learning rate decay, only after # unfreeze all layers callbacks.remove(reduce_lr) steps_per_epoch = max(1, num_train // args.batch_size) decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch - args.transfer_epoch) optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=args.decay_type, decay_steps=decay_steps) # Unfreeze the whole network for further tuning # NOTE: more GPU memory is required after unfreezing the body fine_tune_layers = int(layers_count * unfreeze_level) print(f"Unfreeze {unfreeze_level * 100}% of layers and continue training, to fine-tune.") print(f"Unfroze {fine_tune_layers} layers of {layers_count}") if args.gpu_num >= 2: with strategy.scope(): for i in range(layers_count - fine_tune_layers, layers_count): model.layers[i].trainable = True model.compile(optimizer=optimizer, loss={'yolo_loss': lambda y_true, y_pred: y_pred}) # recompile to apply the change else: for i in range(layers_count - fine_tune_layers, layers_count): model.layers[i].trainable = True model.compile(optimizer=optimizer, loss={'yolo_loss': lambda y_true, y_pred: y_pred}) # recompile to apply the change print('Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'.format(num_train, num_val, args.batch_size, input_shape)) """ Fine-tuning steps, more memory will be used. LR (Learning Rate) will be decayed """ # model.fit_generator(train_data_generator, model.fit( # The YOLO data augmentation generator tool data_generator( annotation_lines=dataset[:num_train], batch_size=args.batch_size, input_shape=input_shape, anchors=anchors, num_classes=num_classes, enhance_augment=args.enhance_augment, rescale_interval=rescale_interval, multi_anchor_assign=args.multi_anchor_assign ), steps_per_epoch=max(1, num_train // args.batch_size), # validation_data=val_data_generator, # Validation generator validation_data=data_generator( annotation_lines=dataset[num_train:], batch_size=args.batch_size, input_shape=input_shape, anchors=anchors, num_classes=num_classes, multi_anchor_assign=args.multi_anchor_assign ), validation_steps=max(1, num_val // args.batch_size), epochs=args.total_epoch, initial_epoch=epochs, # verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks ) # Finally store model if args.model_pruning: model = sparsity.strip_pruning(model) model.save(os.path.join(log_dir, 'trained_final.h5'))
end_step=end_step, frequency=200) } # Assign pruning paramaters pruned_model = sparsity.prune_low_magnitude(model, **pruning_params) # Print the converted model pruned_model.summary() pruned_model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.SGD(lr=0.001), metrics=['acc']) callbacks = [ sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir='./', profile_batch=0) ] print('[INFO] Start pruning process...') pruned_model.fit(train_generator, steps_per_epoch=train_generator.__len__(), callbacks=callbacks, epochs=epochs, validation_data=validation_generator, validation_steps=validation_generator.__len__()) pruned_model_path = './models/pruned_MobileNetv2.h5' # convert pruned model to original final_model = sparsity.strip_pruning(pruned_model)
def main(args): #데이터 annotation 파일 경로 annotation_file = args.annotation_file # 결과 log 및 weight가 저장될 경로 log_dir = os.path.join('logs', '000') #클래스 파일 경로 classes_path = args.classes_path class_names = get_classes(classes_path) num_classes = len(class_names) # anchors 받아오는 라인 anchors = get_anchors(args.anchors_path) num_anchors = len(anchors) # get freeze level according to CLI option if args.weights_path: freeze_level = 0 else: freeze_level = 1 if args.freeze_level is not None: freeze_level = args.freeze_level # callbacks for training process logging = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=False, write_grads=False, write_images=False, update_freq='batch') checkpoint = ModelCheckpoint(os.path.join( log_dir, 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'), monitor='val_loss', mode='min', verbose=1, save_weights_only=False, save_best_only=True, period=1) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, mode='min', patience=10, verbose=1, cooldown=0, min_lr=1e-10) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=50, verbose=1, mode='min') terminate_on_nan = TerminateOnNaN() callbacks = [ logging, checkpoint, reduce_lr, early_stopping, terminate_on_nan ] # 데이터셋 로딩 dataset = get_dataset(annotation_file) if args.val_annotation_file: val_dataset = get_dataset(args.val_annotation_file) num_train = len(dataset) num_val = len(val_dataset) dataset.extend(val_dataset) else: val_split = args.val_split num_val = int(len(dataset) * val_split) num_train = len(dataset) - num_val # assign multiscale interval if args.multiscale: rescale_interval = args.rescale_interval else: rescale_interval = -1 #Doesn't rescale # model input shape check input_shape = args.model_image_size assert (input_shape[0] % 32 == 0 and input_shape[1] % 32 == 0), 'model_image_size should be multiples of 32' # 모델종류에 따른 data generator 및 모델 생성 if num_anchors == 9: # YOLOv3 use 9 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper tiny_version = False elif num_anchors == 6: # Tiny YOLOv3 use 6 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper tiny_version = True elif num_anchors == 5: # YOLOv2 use 5 anchors get_train_model = get_yolo2_train_model data_generator = yolo2_data_generator_wrapper tiny_version = False else: raise ValueError('Unsupported anchors number') # prepare online evaluation callback if args.eval_online: eval_callback = EvalCallBack( args.model_type, dataset[num_train:], anchors, class_names, args.model_image_size, args.model_pruning, log_dir, eval_epoch_interval=args.eval_epoch_interval, save_eval_checkpoint=args.save_eval_checkpoint, elim_grid_sense=args.elim_grid_sense) callbacks.append(eval_callback) # prepare train/val data shuffle callback if args.data_shuffle: shuffle_callback = DatasetShuffleCallBack(dataset) callbacks.append(shuffle_callback) # prepare model pruning config pruning_end_step = np.ceil(1.0 * num_train / args.batch_size).astype( np.int32) * args.total_epoch if args.model_pruning: pruning_callbacks = [ sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=log_dir, profile_batch=0) ] callbacks = callbacks + pruning_callbacks # prepare optimizer optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=None) # support multi-gpu training if args.gpu_num >= 2: # devices_list=["/gpu:0", "/gpu:1"] devices_list = ["/gpu:{}".format(n) for n in range(args.gpu_num)] strategy = tf.distribute.MirroredStrategy(devices=devices_list) print('Number of devices: {}'.format(strategy.num_replicas_in_sync)) with strategy.scope(): # get multi-gpu train model model = get_train_model(args.model_type, anchors, num_classes, weights_path=args.weights_path, freeze_level=freeze_level, optimizer=optimizer, label_smoothing=args.label_smoothing, elim_grid_sense=args.elim_grid_sense, model_pruning=args.model_pruning, pruning_end_step=pruning_end_step) else: # get normal train model model = get_train_model(args.model_type, anchors, num_classes, weights_path=args.weights_path, freeze_level=freeze_level, optimizer=optimizer, label_smoothing=args.label_smoothing, elim_grid_sense=args.elim_grid_sense, model_pruning=args.model_pruning, pruning_end_step=pruning_end_step) model.summary() # Transfer training some epochs with frozen layers first if needed, to get a stable loss. initial_epoch = args.init_epoch epochs = initial_epoch + args.transfer_epoch print("Transfer training stage") print( 'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(num_train, num_val, args.batch_size, input_shape)) # 성능향상을 위해 초반 일부 epoch은 Transfer Learning 진행 (Initial Epoch ~ Transfer Epoch) model.fit_generator( data_generator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, multi_anchor_assign=args.multi_anchor_assign), steps_per_epoch=max(1, num_train // args.batch_size), #validation_data=val_data_generator, validation_data=data_generator( dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign), validation_steps=max(1, num_val // args.batch_size), epochs=epochs, initial_epoch=initial_epoch, #verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks) # Wait 2 seconds for next stage time.sleep(2) if args.decay_type: # rebuild optimizer to apply learning rate decay, only after # unfreeze all layers callbacks.remove(reduce_lr) steps_per_epoch = max(1, num_train // args.batch_size) decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch - args.transfer_epoch) optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=args.decay_type, decay_steps=decay_steps) # Unfreeze the whole network for further tuning # NOTE: more GPU memory is required after unfreezing the body print("Unfreeze and continue training, to fine-tune.") if args.gpu_num >= 2: with strategy.scope(): for i in range(len(model.layers)): model.layers[i].trainable = True model.compile(optimizer=optimizer, loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) # recompile to apply the change else: for i in range(len(model.layers)): model.layers[i].trainable = True model.compile(optimizer=optimizer, loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) # recompile to apply the change print( 'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(num_train, num_val, args.batch_size, input_shape)) # Transfer Learning 이후 나머지 Epoch에 대하여 학습 진행 (Transfer Epoch ~ Total Epoch) # 이 부분이 필요없거나 학습 시간이 너무 오래 걸릴 경우 Total Epoch을 Transfer와 동일하게 두고, 아래 학습을 진행하지 않고 넘어갈 수 있음 # 본인 컴퓨터 사양에 맞춰서 진행 model.fit_generator( data_generator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, multi_anchor_assign=args.multi_anchor_assign), steps_per_epoch=max(1, num_train // args.batch_size), #validation_data=val_data_generator, validation_data=data_generator( dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign), validation_steps=max(1, num_val // args.batch_size), epochs=args.total_epoch, initial_epoch=epochs, #verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks) # Finally store model if args.model_pruning: model = sparsity.strip_pruning(model) model.save(os.path.join(log_dir, 'trained_final.h5'))
def main(args): annotation_file = args.annotation_file log_dir = os.path.join('logs', '000') classes_path = args.classes_path class_names = get_classes(classes_path) num_classes = len(class_names) print('classes_path =', classes_path) print('class_names = ', class_names) print('num_classes = ', num_classes) anchors = get_anchors(args.anchors_path) num_anchors = len(anchors) # get freeze level according to CLI option if args.weights_path: freeze_level = 0 else: freeze_level = 1 if args.freeze_level is not None: freeze_level = args.freeze_level # callbacks for training process logging = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=False, write_grads=False, write_images=False, update_freq='batch') checkpoint = ModelCheckpoint(os.path.join( log_dir, 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'), monitor='val_loss', verbose=1, save_weights_only=False, save_best_only=True, period=1) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=1, cooldown=0, min_lr=1e-10) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=50, verbose=1) terminate_on_nan = TerminateOnNaN() callbacks = [ logging, checkpoint, reduce_lr, early_stopping, terminate_on_nan ] # get train&val dataset dataset = get_dataset(annotation_file) if args.val_annotation_file: val_dataset = get_dataset(args.val_annotation_file) num_train = len(dataset) print('num_train = ', num_train) num_val = len(val_dataset) dataset.extend(val_dataset) else: val_split = args.val_split num_val = int(len(dataset) * val_split) num_train = len(dataset) - num_val # assign multiscale interval if args.multiscale: rescale_interval = args.rescale_interval else: rescale_interval = -1 #Doesn't rescale # model input shape check input_shape = args.model_image_size assert (input_shape[0] % 32 == 0 and input_shape[1] % 32 == 0), 'Multiples of 32 required' # get different model type & train&val data generator if num_anchors == 9: # YOLOv3 use 9 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper # tf.keras.Sequence style data generator #train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval) #val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes) tiny_version = False elif num_anchors == 6: # Tiny YOLOv3 use 6 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper # tf.keras.Sequence style data generator #train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval) #val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes) tiny_version = True elif num_anchors == 5: # YOLOv2 use 5 anchors get_train_model = get_yolo2_train_model data_generator = yolo2_data_generator_wrapper # tf.keras.Sequence style data generator #train_data_generator = Yolo2DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval) #val_data_generator = Yolo2DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes) tiny_version = False else: raise ValueError('Unsupported anchors number') # prepare online evaluation callback if args.eval_online: eval_callback = EvalCallBack( args.model_type, dataset[num_train:], anchors, class_names, args.model_image_size, args.model_pruning, log_dir, eval_epoch_interval=args.eval_epoch_interval, save_eval_checkpoint=args.save_eval_checkpoint) callbacks.append(eval_callback) # prepare train/val data shuffle callback if args.data_shuffle: shuffle_callback = DatasetShuffleCallBack(dataset) callbacks.append(shuffle_callback) # prepare model pruning config pruning_end_step = np.ceil(1.0 * num_train / args.batch_size).astype( np.int32) * args.total_epoch if args.model_pruning: pruning_callbacks = [ sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=log_dir, profile_batch=0) ] callbacks = callbacks + pruning_callbacks # prepare optimizer optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=None) # get train model model = get_train_model(args.model_type, anchors, num_classes, weights_path=args.weights_path, freeze_level=freeze_level, optimizer=optimizer, label_smoothing=args.label_smoothing, model_pruning=args.model_pruning, pruning_end_step=pruning_end_step) # support multi-gpu training template_model = None if args.gpu_num >= 2: # keep the template model for saving result template_model = model model = multi_gpu_model(model, gpus=args.gpu_num) # recompile multi gpu model model.compile(optimizer=optimizer, loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) model.summary() # Transfer training some epochs with frozen layers first if needed, to get a stable loss. initial_epoch = args.init_epoch ##################################################################################################### epochs = initial_epoch + args.transfer_epoch print("Transfer training stage") print( 'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(num_train, num_val, args.batch_size, input_shape)) #model.fit_generator(train_data_generator, model.fit_generator( data_generator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment), steps_per_epoch=max(1, num_train // args.batch_size), #validation_data=val_data_generator, validation_data=data_generator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes), validation_steps=max(1, num_val // args.batch_size), epochs=epochs, initial_epoch=initial_epoch, #verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks) # Wait 2 seconds for next stage time.sleep(2) if args.decay_type: # rebuild optimizer to apply learning rate decay, only after # unfreeze all layers callbacks.remove(reduce_lr) steps_per_epoch = max(1, num_train // args.batch_size) decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch - args.transfer_epoch) optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=args.decay_type, decay_steps=decay_steps) # Unfreeze the whole network for further tuning # NOTE: more GPU memory is required after unfreezing the body print("Unfreeze and continue training, to fine-tune.") for i in range(len(model.layers)): model.layers[i].trainable = True model.compile(optimizer=optimizer, loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) # recompile to apply the change print( 'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(num_train, num_val, args.batch_size, input_shape)) #model.fit_generator(train_data_generator, model.fit_generator( data_generator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval), steps_per_epoch=max(1, num_train // args.batch_size), #validation_data=val_data_generator, validation_data=data_generator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes), validation_steps=max(1, num_val // args.batch_size), epochs=args.total_epoch, initial_epoch=epochs, #verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks) # Finally store model if args.model_pruning: if template_model is not None: template_model = sparsity.strip_pruning(template_model) else: model = sparsity.strip_pruning(model) if template_model is not None: template_model.save(os.path.join(log_dir, 'trained_final.h5')) else: model.save(os.path.join(log_dir, 'trained_final.h5'))
TopKCategoricalAccuracy(k=5, name='top5_accuracy') ]) # Define callbacks model_folder = os.path.dirname(model_path) model_filename = os.path.basename(model_path) output_filename = model_filename[:model_filename.index( '.hdf5')] + '_prune' + str(sparsity_target) + '.hdf5' checkpoint = ModelCheckpoint(filepath=os.path.join(model_folder, output_filename), save_best_only=False, monitor='val_accuracy', save_weights_only=False, verbose=0) #early_stop = EarlyStopping(monitor = 'val_accuracy', patience = 20) pruning_step = sparsity.UpdatePruningStep() log_folder = os.path.join(model_folder, 'logs/') if not os.path.exists(log_folder): os.mkdir(log_folder) print('Writing training logs to ' + log_folder) pruning_summary = sparsity.PruningSummaries(log_dir=log_folder, profile_batch=0) # Fine-tune model #model_history = model.fit(augmented_train_batches, epochs = 5, validation_data = validation_batches, callbacks = [checkpoint, early_stop, pruning_step, pruning_summary]) model_history = pruned_model.fit( train_gen.flow(X_train, y_train, shuffle=True, batch_size=BATCH_SIZE), steps_per_epoch=num_train_examples // BATCH_SIZE, epochs=EPOCHS, validation_data=dev_gen.flow(X_dev, y_dev,