def trainNetwork(params, tracesTrain, tracesVal, labelsTrain, labelsVal, preprocesfunc): mod, nb_epochs, learning_rate, batch_size, one_cycle_lr = unpackParams(params) K.clear_session() model = mod(input_size=tracesTrain.shape[1], learning_rate=learning_rate) model.summary() # Ensure the data is in the right shape input_layer_shape = model.get_layer(index=0).input_shape if len(input_layer_shape) == 2: tracesTrain_shaped = tracesTrain tracesVal_shaped = tracesVal elif len(input_layer_shape) == 3: tracesTrain_shaped = tracesTrain.reshape((tracesTrain.shape[0], tracesTrain.shape[1], 1)) tracesVal_shaped = tracesVal.reshape((tracesVal.shape[0], tracesVal.shape[1], 1)) modelpath = './../models/' + mod.__name__ + preprocesfunc + '.hdf5' print('Training model:', modelpath) checkpoint = ModelCheckpoint(modelpath, verbose=0, save_best_only=False) if one_cycle_lr: print('During training we will make use of the One Cycle learning rate policy.') lr_manager = OneCycleLR(max_lr=learning_rate, end_percentage=0.2, scale_percentage=0.1, maximum_momentum=None, minimum_momentum=None, verbose=False) callbacks = [checkpoint, lr_manager] else: callbacks = [checkpoint] history = model.fit(x=tracesTrain_shaped, y=to_categorical(labelsTrain, num_classes=256), validation_data=(tracesVal_shaped, to_categorical(labelsVal, num_classes=256)), batch_size=batch_size, verbose=0, epochs=nb_epochs, callbacks=callbacks) return history
def OneCycleTrain(squeeze_scale_exp, small_filter_rate, max_lr_exp, max_momentum, num_epoch): # def LRSearch(squeeze_scale_exp, small_filter_rate): scale = 10**squeeze_scale_exp # float(sys.argv[1]) # float(sys.argv[1]) small_filter_rate = small_filter_rate # float(sys.argv[2]) # float(sys.argv[2]) max_lr = 10**max_lr_exp # float(sys.argv[3]) max_momentum = max_momentum num_epoch = int(num_epoch) # int(sys.argv[4]) batch_size = 2000 #minimum_lr = 1e-8 #maximum_lr = 1e8 f = open('data.p', 'rb') (X_train, y_train), (X_test, y_test) = pickle.load(f) # cifar100.load_data() num_samples = len(X_train) op = tf.keras.optimizers.SGD(momentum=max_momentum - 0.05, nesterov=True) # , decay=1e-6, momentum=0.9) model = squeeze_net(small_filter_rate=small_filter_rate, squeeze_scale=scale, verbose=False) loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True) lr_manager = OneCycleLR(max_lr, maximum_momentum=max_momentum, minimum_momentum=max_momentum - 0.1) stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_acc', patience=5, verbose=1) stop_to_avoid_divergence = Avoid_Divergence( random_accuracy=1. / float(max(y_train)[0] + 1), num_batch=num_samples / float(batch_size)) oh = OneHotEncoder(sparse=False) oh.fit(y_train) model.compile(loss=loss, optimizer=op, metrics=['acc']) train_data_generator = ClassBalancedBatchGenerator(X_train / 255., oh.transform(y_train), batch_size) history = model.fit_generator( train_data_generator, steps_per_epoch=int(num_samples / batch_size), epochs=num_epoch, # batch_size=batch_size, validation_data=(X_test / 255., oh.transform(y_test)), callbacks=[lr_manager, stop_early, stop_to_avoid_divergence], shuffle=False, use_multiprocessing=True, workers=4) try: final_val_acc = history.history['val_acc'][-1] except: final_val_acc = -1e13 del model gc.collect() tf.keras.backend.clear_session() # cuda.select_device(0) # cuda.close() return final_val_acc
def train_model(X_profiling, Y_profiling, X_test, Y_test, model, save_file_name, epochs=150, batch_size=100, max_lr=1e-3): check_file_exists(os.path.dirname(save_file_name)) # Save model every epoch save_model = ModelCheckpoint(save_file_name) # Get the input layer shape input_layer_shape = model.get_layer(index=0).input_shape # Sanity check if input_layer_shape[1] != len(X_profiling[0]): print("Error: model input shape %d instead of %d is not expected ..." % (input_layer_shape[1], len(X_profiling[0]))) sys.exit(-1) Reshaped_X_profiling, Reshaped_X_test = X_profiling.reshape( (X_profiling.shape[0], X_profiling.shape[1], 1)), X_test.reshape( (X_test.shape[0], X_test.shape[1], 1)) # One Cycle Policy lr_manager = OneCycleLR(max_lr=max_lr, end_percentage=0.2, scale_percentage=0.1, maximum_momentum=None, minimum_momentum=None, verbose=True) callbacks = [save_model, lr_manager] history = model.fit(x=Reshaped_X_profiling, y=to_categorical(Y_profiling, num_classes=256), validation_data=(Reshaped_X_test, to_categorical(Y_test, num_classes=256)), batch_size=batch_size, verbose=1, epochs=epochs, callbacks=callbacks) return history
X_test = X_test.astype('float32') # preprocess input mean = np.mean(X_train, axis=(0, 1, 2), keepdims=True).astype('float32') std = np.mean(X_train, axis=(0, 1, 2), keepdims=True).astype('float32') print("Channel Mean : ", mean) print("Channel Std : ", std) X_train = (X_train - mean) / (std) X_test = (X_test - mean) / (std) # Learning rate finder callback setup num_samples = X_train.shape[0] lr_manager = OneCycleLR(num_samples, nb_epoch, batch_size, max_lr=0.02, maximum_momentum=0.9, verbose=True) # For training, the auxilary branch must be used to correctly train NASNet model = MiniMobileNetV2((img_rows, img_cols, img_channels), alpha=1.4, weight_decay=1e-6, weights=None, classes=nb_classes) model.summary() # These values will be overridden by the above callback optimizer = SGD(lr=0.002, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) model.load_weights(weights_file) if not data_augmentation: print('Not using data augmentation.')
std = np.mean(X_train, axis=(0, 1, 2), keepdims=True).astype('float32') print("Channel Mean : ", mean) print("Channel Std : ", std) X_train = (X_train - mean) / (std) X_test = (X_test - mean) / (std) # Learning rate finder callback setup num_samples = X_train.shape[0] # When using the validation set for LRFinder, try out values starting from 2x # the lr found there and move lower until its good for the first few epochs lr_manager = OneCycleLR(max_lr=0.025, end_percentage=0.2, scale_percentage=0.1, maximum_momentum=0.95, verbose=True) # For training, the auxilary branch must be used to correctly train NASNet model = MiniVGG((img_rows, img_cols, img_channels), weight_decay=1e-5, weights=None, classes=nb_classes) model.summary() # These values will be overridden by the above callback optimizer = SGD(lr=0.0025, momentum=0.95, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
mean = np.mean(X_train, axis=(0, 1, 2), keepdims=True).astype('float32') std = np.mean(X_train, axis=(0, 1, 2), keepdims=True).astype('float32') print("Channel Mean : ", mean) print("Channel Std : ", std) X_train = (X_train - mean) / (std) X_test = (X_test - mean) / (std) VGG16_model = VGG16Net(img_rows,img_cols,img_channels,10, 1e-5) VGG16_model.summary() VGG16_model.compile(SGD(lr=0.0173, momentum=0.9, decay=0, nesterov=True), loss = 'categorical_crossentropy', metrics=['accuracy']) #clc = CyclicLR(base_lr= 0.003, max_lr=0.1, step_size = 5*X_train.shape[0]//n_batch, mode = 'triangular') clc = OneCycleLR(max_lr = 0.15, end_percentage = 0.1, verbose = True) tb = TensorBoard(log_dir='./Graph/6', histogram_freq=0, write_graph=True, write_images=True) if ~data_aug: History = VGG16_model.fit( X_train, Y_train, batch_size=n_batch, epochs=n_epochs, validation_data=(X_test, Y_test), shuffle=True,
BATCH_SIZE = 500 MAX_LR = 0.1 # Data X = np.random.rand(NUM_SAMPLES, 10) Y = np.random.randint(0, 2, size=NUM_SAMPLES) # Model inp = Input(shape=(10, )) x = Dense(5, activation='relu')(inp) x = Dense(1, activation='sigmoid')(x) model = Model(inp, x) # clr_triangular = OneCycleLR(NUM_SAMPLES, NUM_EPOCHS, BATCH_SIZE, MAX_LR, end_percentage=0.2, scale_percentage=0.2) clr_triangular = OneCycleLR(max_lr=MAX_LR, end_percentage=0.2, scale_percentage=0.2) model.compile(optimizer=SGD(0.1), loss='binary_crossentropy', metrics=['accuracy']) model.fit(X, Y, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS, callbacks=[clr_triangular], verbose=0) print("LR Range : ", min(clr_triangular.history['lr']), max(clr_triangular.history['lr']))
# Convert class vectors to binary class matrices. # preprocess input # normalization VGG16_model = VGG16Net(img_rows, img_cols, img_channels, 17, 1e-5) VGG16_model.summary() VGG16_model.compile(SGD(lr=0.00407, momentum=0.9, decay=0.00001, nesterov=False), loss='categorical_crossentropy', metrics=['accuracy']) #clc = CyclicLR(base_lr= 0.00407, max_lr=0.01023, step_size = 4*X_train.shape[0]//n_batch, mode = 'triangular') clc = OneCycleLR(max_lr=0.1023, maximum_momentum=0.9, end_percentage=0.1, verbose=True) if ~data_aug: History = VGG16_model.fit( X_train, Y_train, batch_size=n_batch, epochs=n_epochs, validation_data=(X_test, Y_test), shuffle=True, verbose=1, callbacks=[clc, LRTensorBoard(log_dir='./Graph/6')]) #Start training using dataaugumentation generator else:
NUM_EPOCHS = 100 BATCH_SIZE = 500 MAX_LR = 0.1 save_dir = './' # Data X = np.random.rand(NUM_SAMPLES, 10) Y = np.random.randint(0, 2, size=NUM_SAMPLES) # Model inp = Input(shape=(10,)) x = Dense(5, activation='relu')(inp) x = Dense(1, activation='sigmoid')(x) model = Model(inp, x) clr_triangular = OneCycleLR(NUM_SAMPLES, NUM_EPOCHS, BATCH_SIZE, MAX_LR, end_percentage=0.2, scale_percentage=0.2) model.compile(optimizer=SGD(0.1), loss='binary_crossentropy', metrics=['accuracy']) model.fit(X, Y, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS, callbacks=[clr_triangular], verbose=0) print("LR Range : ", min(clr_triangular.history['lr']), max(clr_triangular.history['lr'])) print("Momentum Range : ", min(clr_triangular.history['momentum']), max(clr_triangular.history['momentum'])) plt.figure() plt.xlabel('Training Iterations') plt.ylabel('Learning Rate') plt.title("CLR") plt.plot(clr_triangular.history['lr']) plt.show()
BATCH_SIZE = 500 INITIAL_LR = 0.1 # Data X = np.random.rand(NUM_SAMPLES, 10) Y = np.random.randint(0, 2, size=NUM_SAMPLES) # Model inp = Input(shape=(10, )) x = Dense(5, activation='relu')(inp) x = Dense(1, activation='sigmoid')(x) model = Model(inp, x) clr_triangular = OneCycleLR(NUM_SAMPLES, NUM_EPOCHS, BATCH_SIZE, INITIAL_LR, end_percentage=0.1, scale_percentage=None) model.compile(optimizer=SGD(0.1), loss='binary_crossentropy', metrics=['accuracy']) model.fit(X, Y, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS, callbacks=[clr_triangular], verbose=0) plt.xlabel('Training Iterations')
def main(data_path=args['data_path'], train_from=train_from): train_gen, valid_gen = create_data_generator(data_path) # print('[INFO] serializeing label binarizer...') # f = open(args['labelbin'], 'wb') # d = train_gen.class_indices # pickle.dump(d, f) if train_from == 'trained_weights': model = load_model_from_trained_weights(imagedims=IMAGE_DIMS, nb_classes=len( train_gen.class_indices), weights=args['weight_path'], freeze_until=freeze_until) elif train_from == 'trained_model': model = load_model_from_trained_model() else: model = load_models(imagedims=IMAGE_DIMS, nb_classes=len(train_gen.class_indices)) print('[INFO] compiling model...') model.compile(loss="categorical_crossentropy", optimizer=OPT, metrics=["accuracy"]) plot_model(model, to_file='../model_outputs/architecture.png', show_layer_names=True, show_shapes=True) checkpoint = ModelCheckpoint(filepath=args['save_model'], monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1) stop_early = EarlyStopping(monitor='val_loss', min_delta=.0, patience=40, verbose=0, mode='auto') lr_manager = OneCycleLR(NUM_SAMPLES, EPOCHS, BS, max_lr=0.001, maximum_momentum=0.9, verbose=True) callbacks = [checkpoint, stop_early, lr_manager] H = model.fit_generator( train_gen, validation_data=valid_gen, epochs=EPOCHS, #steps_per_epoch=209, callbacks=callbacks, verbose=1) plt.style.use("ggplot") plt.figure() plt.plot(H.history["loss"], label="train_loss") plt.plot(H.history["val_loss"], label="val_loss") plt.plot(H.history["acc"], label="train_acc") plt.plot(H.history["val_acc"], label="val_acc") plt.title("Training Loss and Accuracy") plt.xlabel("Epoch #") plt.ylabel("Loss/Accuracy") plt.legend(loc="upper left") plt.savefig('../model_outputs/acc_loss.png') df = pd.DataFrame.from_dict(H.history) df.to_csv('../model_outputs/hist.csv', encoding='utf-8', index=False)
print("Channel Mean : ", mean) print("Channel Std : ", std) X_train = (X_train - mean) / (std) X_test = (X_test - mean) / (std) # Learning rate finder callback setup num_samples = X_train.shape[0] # When using the validation set for LRFinder, try out values starting from 2x # the lr found there and move lower until its good for the first few epochs lr_manager = OneCycleLR(num_samples, nb_epoch, batch_size, max_lr=0.025, end_percentage=0.2, scale_percentage=0.1, maximum_momentum=0.95, verbose=True) # For training, the auxilary branch must be used to correctly train NASNet model = MiniVGG((img_rows, img_cols, img_channels), weight_decay=1e-5, weights=None, classes=nb_classes) model.summary() # These values will be overridden by the above callback optimizer = SGD(lr=0.0025, momentum=0.95, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=optimizer,
X_test = X_test.astype('float32') # preprocess input mean = np.mean(X_train, axis=(0, 1, 2), keepdims=True).astype('float32') std = np.mean(X_train, axis=(0, 1, 2), keepdims=True).astype('float32') print("Channel Mean : ", mean) print("Channel Std : ", std) X_train = (X_train - mean) / (std) X_test = (X_test - mean) / (std) # Learning rate finder callback setup num_samples = X_train.shape[0] lr_manager = OneCycleLR(max_lr=0.02, maximum_momentum=0.9, verbose=True) # For training, the auxilary branch must be used to correctly train NASNet model = MiniMobileNetV2((img_rows, img_cols, img_channels), alpha=1.4, weight_decay=1e-6, weights=None, classes=nb_classes) model.summary() # These values will be overridden by the above callback optimizer = SGD(lr=0.002, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])