def train(optimizer, num_classes, num_epochs, scheduler, device): load = get_dataset() model = get_model_instance_segmentation(num_classes) model = model.to(device) if optimizer == 'Adam': exp_optimizer = optim.Adam(model.parameters(), lr=1e-3) else: exp_optimizer = optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005) if scheduler: lr_scheduler = optim.lr_scheduler.StepLR(exp_optimizer, step_size=3, gamma=0.1) for epoch in range(num_epochs): train_one_epoch(model, exp_optimizer, load['train'], device, epoch, print_freq=10) lr_scheduler.step() evaluate(model, load['val'], device=device) torch.save(model.state_dict(), 'best_model') print('Finished')
def train(model_name, dropout_rate, optim_name, use_lookahead, batch_size, iter_size, lr_sched, initial_lr, final_lr, weight_decay, epochs, dataset_dir): """Prepare data and train the model.""" batch_size = get_batch_size(model_name, batch_size) iter_size = get_iter_size(model_name, iter_size) initial_lr = get_initial_lr(model_name, initial_lr) final_lr = get_final_lr(model_name, final_lr) optimizer = get_optimizer(model_name, optim_name, initial_lr) weight_decay = get_weight_decay(model_name, weight_decay) # get training and validation data ds_train = get_dataset(dataset_dir, 'train', batch_size) ds_valid = get_dataset(dataset_dir, 'validation', batch_size) # instantiate training callbacks lrate = get_lr_func(epochs, lr_sched, initial_lr, final_lr) save_name = model_name if not model_name.endswith('.h5') else \ os.path.split(model_name)[-1].split('.')[0].split('-')[0] model_ckpt = tf.keras.callbacks.ModelCheckpoint( os.path.join(config.SAVE_DIR, save_name) + '-ckpt-{epoch:03d}.h5', monitor='val_loss', save_best_only=True) tensorboard = tf.keras.callbacks.TensorBoard( log_dir='{}/{}'.format(config.LOG_DIR, time.time())) # build model and do training model = get_training_model( model_name=model_name, dropout_rate=dropout_rate, optimizer=optimizer, use_lookahead=use_lookahead, iter_size=iter_size, weight_decay=weight_decay) model.fit( x=ds_train, steps_per_epoch=1281167 // batch_size, validation_data=ds_valid, validation_steps=50000 // batch_size, callbacks=[lrate, model_ckpt, tensorboard], # The following doesn't seem to help in terms of speed. # use_multiprocessing=True, workers=4, epochs=epochs) # training finished model.save('{}/{}-model-final.h5'.format(config.SAVE_DIR, save_name))
def __init__(self, imagenet_path, model_name, bs, steps, epochs, lr, num_data_workers, save_dir, metrics='accuracy'): self.strategy = tf.distribute.MirroredStrategy() self.num_devices = int(self.strategy.num_replicas_in_sync) print ('The number of devices: {}'.format(self.num_devices)) with self.strategy.scope(): self.model_name = model_name self.bs = bs * self.num_devices self.steps = steps self.epochs = epochs self.lr = lr self.ds_train = get_dataset(imagenet_path, 'train', bs, num_data_workers) self.ds_val = get_dataset(imagenet_path, 'validation', bs, num_data_workers) self.ds_train = self.strategy.experimental_distribute_dataset(self.ds_train) self.ds_val = self.strategy.experimental_distribute_dataset(self.ds_val) self.model = load_model_arch(self.model_name, 1000) #1000 is the number of class self.optimizer= SGD(lr=self.lr, momentum=0.9, decay=3e-5, nesterov=False) self.loss = tf.keras.losses.CategoricalCrossentropy() self.metrics = metrics self.save_dir = save_dir if not os.path.exists(self.save_dir): os.makedirs(self.save_dir)
def main(): parser = argparse.ArgumentParser(description=DESCRIPTION) parser.add_argument('--dataset_dir', type=str, default=config.DEFAULT_DATASET_DIR) parser.add_argument('--batch_size', type=int, default=16) parser.add_argument('model_file', type=str, help='a saved model (.h5) file') args = parser.parse_args() config_keras_backend() if not args.model_file.endswith('.h5'): sys.exit('model_file is not a .h5') model = tf.keras.models.load_model(args.model_file, custom_objects={'AdamW': AdamW}) ds_validation = get_dataset(args.dataset_dir, 'validation', args.batch_size) results = model.evaluate(x=ds_validation, steps=50000 // args.batch_size) print('test loss, test acc:', results)
def _train_and_score(self, dataset: str) -> float: """ Private function that trains a network on a dataset and returns accuracy on test set :param dataset: dataset to be used for training and evaluation :return: test accuracy """ num_classes, batch_size, input_shape, x_train, x_test, y_train, y_test = get_dataset( dataset=dataset) model = self._compile_model(self.network, num_classes, input_shape) model.fit( x_train, y_train, batch_size=batch_size, epochs=10000, # using early stopping, so no real limit verbose=0, validation_data=(x_test, y_test), callbacks=[c.EarlyStopping(patience=5)]) score = model.evaluate(x_test, y_test, verbose=0) return score[1]
def main(): parser = argparse.ArgumentParser(description=DESCRIPTION) parser.add_argument('--dataset_dir', type=str, default=config.DEFAULT_DATASET_DIR) parser.add_argument('--batch_size', type=int, default=10) parser.add_argument('--inv_model_file', type=str, help='a saved model (.h5) file') args = parser.parse_args() config_keras_backend() if not args.inv_model_file.endswith('.h5'): sys.exit('model_file is not a .h5') inv_model = tf.keras.models.load_model(args.inv_model_file, compile=False, custom_objects={'AdamW': AdamW}) inv_model.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=['accuracy']) ds_validation = get_dataset(args.dataset_dir, 'validation', args.batch_size) ## VGG vgg_model = VGG19(include_top=True, weights='imagenet', classes=1000) vgg_model.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=['accuracy']) # InceptionV3 inception_model = InceptionV3(include_top=True, weights='imagenet', classes=1000) inception_model.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=['accuracy']) ## ResNet resnet_model = ResNet50(include_top=True, weights='imagenet', classes=1000) resnet_model.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=['accuracy']) # Process batches iteration = 0 sum1 = 0 sum2 = 0 for images, labels in tfds.as_numpy(ds_validation): if iteration < 532: #3822:#532: print('continuing') iteration += 1 continue if iteration == 50000: exit() labels = np.argmax(labels, axis=1) adv_imgs = run_attack(False, 'CarliniL2Method', inception_model, images, labels, batch_size=args.batch_size, dataset='cifar', fgsm_epsilon=0.3, cwl2_confidence=0) #adv_imgs = run_attack(False, 'DeepFool', inception_model, images, labels, batch_size=args.batch_size, dataset='cifar', fgsm_epsilon=0.3, cwl2_confidence=0) #adv_imgs = run_attack(True, 'FastGradientMethod', inception_model, images, labels, batch_size=args.batch_size, dataset='cifar', fgsm_epsilon=0.1, cwl2_confidence=0) #adv_imgs = run_attack(False, 'ProjectedGradientDescent', inception_model, images, labels, batch_size=10, dataset='cifar', fgsm_epsilon=0.1, cwl2_confidence=0) ## VGG ################################################ #img *= (2.0/255) # normalize to: 0.0~2.0 #img -= 1.0 # subtract mean to make it: -1.0~1.0 #img = np.expand_dims(img, axis=0) vgg_imgs = [] resnet_imgs = [] inc_imgs = [] flip_imgs = [] inv_imgs = [] adv_vgg_imgs = [] adv_resnet_imgs = [] adv_inc_imgs = [] adv_flip_imgs = [] adv_inv_imgs = [] for ii in range(images.shape[0]): img = copy.deepcopy(images[ii, :, :, :]) img += 1.0 #img /= (2.0/255) img *= (255.0 / 2.0) ## VGG vgg_img = copy.deepcopy(img) vgg_img = cv2.resize(vgg_img, (224, 224)) vgg_img = vgg_preprocess_input(vgg_img) vgg_imgs.append(vgg_img) ## Resnet resnet_img = copy.deepcopy(img) resnet_img = cv2.resize(resnet_img, (224, 224)) resnet_img = resnet_preprocess_input(resnet_img) resnet_imgs.append(resnet_img) ## InceptionV3 inc_img = copy.deepcopy(img) inc_img = cv2.resize(inc_img, (299, 299)) inc_img = inception_preprocess_input(inc_img) inc_imgs.append(inc_img) ## Flipped #flip_img = copy.deepcopy(img) #flip_img = cv2.resize(flip_img, (299, 299)) #flip_img = cv2.flip(flip_img, 1) #flip_img = inception_preprocess_input(flip_img) #flip_imgs.append(flip_img) flip_img = copy.deepcopy(images[ii, :, :, :]) flip_img = cv2.flip(flip_img, 1) flip_imgs.append(flip_img) ## Inverse inv_img = copy.deepcopy(images[ii, :, :, :]) ######### inv_img += 1.0 inv_img /= 2.0 inv_img = 1 - inv_img inv_img *= 255.0 inv_img = cv2.resize(inv_img, (299, 299)) inv_img = inception_preprocess_input(inv_img) inv_imgs.append(inv_img) #========================================== # ADVERSARIAL --------------- adv_img = copy.deepcopy(adv_imgs[ii, :, :, :]) adv_img += 1.0 #adv_img /= (2.0/255) adv_img *= (255.0 / 2.0) # VGG adv_vgg_img = copy.deepcopy(adv_img) adv_vgg_img = cv2.resize(adv_vgg_img, (224, 224)) adv_vgg_img = vgg_preprocess_input(adv_vgg_img) adv_vgg_imgs.append(adv_vgg_img) # Resnet adv_resnet_img = copy.deepcopy(adv_img) adv_resnet_img = cv2.resize(adv_resnet_img, (224, 224)) adv_resnet_img = resnet_preprocess_input(adv_resnet_img) adv_resnet_imgs.append(adv_resnet_img) # InceptionV3 adv_inc_img = copy.deepcopy(adv_img) adv_inc_img = cv2.resize(adv_inc_img, (299, 299)) adv_inc_img = inception_preprocess_input(adv_inc_img) adv_inc_imgs.append(adv_inc_img) ## Flipped #adv_flip_img = copy.deepcopy(img) #adv_flip_img = cv2.resize(adv_flip_img, (299, 299)) #adv_flip_img = cv2.flip(adv_flip_img, 1) #adv_flip_img = inception_preprocess_input(adv_flip_img) #adv_flip_imgs.append(adv_flip_img) adv_flip_img = copy.deepcopy(adv_imgs[ii, :, :, :]) adv_flip_img = cv2.flip(adv_flip_img, 1) adv_flip_imgs.append(adv_flip_img) ## Inverse ##test on inverse Inceptionv3 adv_inv_img = copy.deepcopy(adv_imgs[ii, :, :, :]) ######### adv_inv_img += 1.0 adv_inv_img /= 2.0 adv_inv_img = 1 - adv_inv_img adv_inv_img *= 255.0 adv_inv_img = cv2.resize(adv_inv_img, (299, 299)) adv_inv_img = inception_preprocess_input(adv_inv_img) adv_inv_imgs.append(adv_inv_img) # Horizontal Flipping # test on Resnet vgg_imgs = np.asarray(vgg_imgs) resnet_imgs = np.asarray(resnet_imgs) inc_imgs = np.asarray(inc_imgs) flip_imgs = np.asarray(flip_imgs) inv_imgs = np.asarray(inv_imgs) adv_vgg_imgs = np.asarray(adv_vgg_imgs) adv_resnet_imgs = np.asarray(adv_resnet_imgs) adv_inc_imgs = np.asarray(adv_inc_imgs) adv_flip_imgs = np.asarray(adv_flip_imgs) adv_inv_imgs = np.asarray(adv_inv_imgs) # Default ResNet accuracy _, results1 = resnet_model.evaluate(x=resnet_imgs, y=labels, verbose=0) _, results2 = vgg_model.evaluate(x=vgg_imgs, y=labels, verbose=0) _, results3 = inception_model.evaluate(x=inc_imgs, y=labels, verbose=0) _, results4 = inception_model.evaluate(x=flip_imgs, y=labels, verbose=0) _, results5 = inv_model.evaluate(x=inv_imgs, y=labels, verbose=0) # print('-----------------------------------------------------') _, results6 = resnet_model.evaluate(x=adv_resnet_imgs, y=labels, verbose=0) _, results7 = vgg_model.evaluate(x=adv_vgg_imgs, y=labels, verbose=0) _, results8 = inception_model.evaluate(x=adv_inc_imgs, y=labels, verbose=0) _, results9 = inception_model.evaluate(x=adv_flip_imgs, y=labels, verbose=0) _, results10 = inv_model.evaluate(x=adv_inv_imgs, y=labels, verbose=0) print(iteration) print(results1, results6) print(results2, results7) print(results3, results8) print(results4, results9) print(results5, results10) # Print the figure images INDEX = 1 # Original image orig = copy.deepcopy(inc_imgs[INDEX]) orig /= 2.0 orig += 0.5 orig *= 255.0 # Adversarial image adv = copy.deepcopy(adv_inc_imgs[INDEX]) adv /= 2.0 adv += 0.5 adv *= 255.0 #Perturbation image diff = adv - orig print(np.amax(diff)) #exit() # Flip image flip = copy.deepcopy(flip_imgs[INDEX]) flip /= 2.0 flip += 0.5 flip *= 255.0 # Save images imageio.imwrite('pandas/panda_orig.png', np.reshape(orig, (299, 299, 3))) imageio.imwrite('pandas/panda_adv.png', np.reshape(adv, (299, 299, 3))) imageio.imwrite('pandas/panda_diff.png', np.reshape(diff, (299, 299, 3))) imageio.imwrite('pandas/panda_flip.png', np.reshape(flip, (299, 299, 3))) print(labels) print('Inception---original-------------------------') #preds = inception_model.predict(np.reshape(inc_imgs[INDEX],(1,299,299,3))) #print('confidence:', inc_decode_predictions(preds, top=1)[0]) preds = inception_model.predict(inc_imgs) print('IncV3 Predicted:', np.argmax(preds, axis=1)) print('IncV3 Predicted:', np.amax(preds, axis=1)) print() print('VGG---original-------------------------') #preds = vgg_model.predict(np.reshape(vgg_imgs[INDEX],(1,224,224,3))) #print('confidence:', vgg_decode_predictions(preds, top=1)[0]) preds = vgg_model.predict(vgg_imgs) print('VGG Predicted:', np.argmax(preds, axis=1)) print('VGG Predicted:', np.amax(preds, axis=1)) print() print('ResNet---original-------------------------') #preds = resnet_model.predict(np.reshape(resnet_imgs[INDEX],(1,224,224,3))) #print('confidence:', resnet_decode_predictions(preds, top=1)[0]) preds = resnet_model.predict(resnet_imgs) print('ResNet Predicted:', np.argmax(preds, axis=1)) print('ResNet Predicted:', np.amax(preds, axis=1)) print() print('Inception---adv-------------------------') #preds = inception_model.predict(np.reshape(adv_inc_imgs[INDEX],(1,299,299,3))) #print('confidence:', inc_decode_predictions(preds, top=1)[0]) preds = inception_model.predict(adv_inc_imgs) print('Adv IncV3 Predicted:', np.argmax(preds, axis=1)) print('Adv IncV3 Predicted:', np.amax(preds, axis=1)) print() print('VGG---adv-------------------------') #preds = vgg_model.predict(np.reshape(adv_vgg_imgs[INDEX],(1,224,224,3))) #print('confidence:', vgg_decode_predictions(preds, top=1)[0]) preds = vgg_model.predict(adv_vgg_imgs) print('Adv VGG Predicted:', np.argmax(preds, axis=1)) print('Adv VGG Predicted:', np.amax(preds, axis=1)) print() print('ResNet---adv-------------------------') #preds = resnet_model.predict(np.reshape(adv_resnet_imgs[INDEX],(1,224,224,3))) #print('confidence:', resnet_decode_predictions(preds, top=1)[0]) preds = resnet_model.predict(adv_resnet_imgs) print('Adv ResNet Predicted:', np.argmax(preds, axis=1)) print('Adv ResNet Predicted:', np.amax(preds, axis=1)) print() print('Inception---flip-------------------------') #preds = inception_model.predict(np.reshape(adv_flip_imgs[INDEX],(1,299,299,3))) #print('confidence:', inc_decode_predictions(preds, top=1)[0]) preds = inception_model.predict(adv_flip_imgs) print('flip Predicted:', np.argmax(preds, axis=1)) print('flip Predicted:', np.amax(preds, axis=1)) print() #print('Accuracies--------------------------') #print('flip accuracy:', inc_decode_predictions(preds, top=3)[0]) exit() with open("output_pgd_untarg_batch-20_norm-2.txt", "a") as myfile: myfile.write( str(results1) + ' ' + str(results2) + ' ' + str(results3) + ' ' + str(results4) + ' ' + str(results5) + ' ' + str(results6) + ' ' + str(results7) + ' ' + str(results8) + ' ' + str(results9) + ' ' + str(results10) + '\n') # Distances norm_diffs_1 = [ np.linalg.norm( np.subtract(adv_inc_imgs[ii].flatten(), inc_imgs[ii].flatten()), 1) for ii in range(inc_imgs.shape[0]) ] norm_diffs_2 = [ np.linalg.norm( np.subtract(adv_inc_imgs[ii].flatten(), inc_imgs[ii].flatten()), 2) for ii in range(inc_imgs.shape[0]) ] norm_diffs_inf = [ np.linalg.norm( np.subtract(adv_inc_imgs[ii].flatten(), inc_imgs[ii].flatten()), np.inf) for ii in range(inc_imgs.shape[0]) ] print(np.mean(norm_diffs_1), np.mean(norm_diffs_2), np.mean(norm_diffs_inf)) with open("distances_pgd_untarg_batch-20_norm-2.txt", "a") as myfile: myfile.write( str(np.mean(norm_diffs_1)) + ' ' + str(np.mean(norm_diffs_2)) + ' ' + str(np.mean(norm_diffs_inf)) + '\n') iteration += 1 #exit() #results = resnet_model.evaluate(x=adv_imgs, y=to_categorical(labels, 1000)) #print('RESNET test loss, test acc:', results) #results = vgg_model.evaluate(x=adv_imgs, y=to_categorical(labels, 1000)) #print('VGG test loss, test acc:', results) # labels = np.argmax(labels, axis=1) # # #results = model.evaluate( # # x=images, y=to_categorical(labels, 1000)) # #print('test loss, test acc:', results) # total = total + images.shape[0] # print(total) exit() results = resnet_model.evaluate(x=ds_validation, steps=50000 // args.batch_size) print('test loss, test acc:', results) clear_keras_session()
def main(): parser = DenseNetArgumentParser( description=( "train.py is the main training/evaluation script for DenseNet. " "In order to run training on multiple Gaudi cards, use demo_densenet.py or run " "train.py with mpirun.")) args, _ = parser.parse_known_args() strategy = None verbose = 1 os.environ['ENABLE_EXPERIMENTAL_FLAGS'] = 'true' os.environ['RUN_TPC_FUSER'] = '******' if args.deterministic: if args.inputs is None: raise ValueError("Must provide inputs for deterministic mode") if args.resume_from_checkpoint_path is None: raise ValueError("Must provide checkpoint for deterministic mode") if args.dtype == 'bf16': os.environ['TF_BF16_CONVERSION'] = '1' if args.run_on_hpu: load_habana_module() if args.use_hpu_strategy: hls_addresses = str(os.environ.get( "MULTI_HLS_IPS", "127.0.0.1")).split(",") TF_BASE_PORT = 2410 mpi_rank = comm_rank() mpi_size = comm_size() if mpi_rank > 0: verbose = 0 worker_hosts = "" for address in hls_addresses: # worker_hosts: comma-separated list of worker ip:port pairs. worker_hosts = worker_hosts + ",".join( [address + ':' + str(TF_BASE_PORT + rank) for rank in range(mpi_size//len(hls_addresses))]) task_index = mpi_rank # Configures cluster spec for distribution strategy. _ = distribution_utils.configure_cluster(worker_hosts, task_index) strategy = HPUStrategy() print('Number of devices: {}'.format( strategy.num_replicas_in_sync)) else: strategy = tf.distribute.MultiWorkerMirroredStrategy() print('Number of devices: {}'.format(strategy.num_replicas_in_sync)) if args.seed is not None: os.environ['TF_DETERMINISTIC_OPS'] = '1' random.seed(args.seed) np.random.seed(args.seed) tf.random.set_seed(args.seed) img_rows, img_cols = 224, 224 # Resolution of inputs channel = 3 num_classes = 1000 batch_size = args.batch_size nb_epoch = args.epochs dataset_dir = args.dataset_dir resume_from_checkpoint_path = args.resume_from_checkpoint_path resume_from_epoch = args.resume_from_epoch dropout_rate = args.dropout_rate weight_decay = args.weight_decay optim_name = args.optimizer initial_lr = args.initial_lr model_name = args.model save_summary_steps = args.save_summary_steps if model_name == "densenet121": growth_rate = 32 nb_filter = 64 nb_layers = [6, 12, 24, 16] elif model_name == "densenet161": growth_rate = 48 nb_filter = 96 nb_layers = [6, 12, 36, 24] elif model_name == "densenet169": growth_rate = 32 nb_filter = 64 nb_layers = [6, 12, 32, 32] else: print("model is not supported") exit(1) # Load our model if strategy: with strategy.scope(): model = densenet_model(img_rows=img_rows, img_cols=img_cols, color_type=channel, dropout_rate=dropout_rate, weight_decay=weight_decay, num_classes=num_classes, growth_rate=growth_rate, nb_filter=nb_filter, nb_layers=nb_layers) optimizer = get_optimizer( model_name, optim_name, initial_lr, epsilon=1e-2) model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) else: model = densenet_model(img_rows=img_rows, img_cols=img_cols, color_type=channel, dropout_rate=dropout_rate, weight_decay=weight_decay, num_classes=num_classes, growth_rate=growth_rate, nb_filter=nb_filter, nb_layers=nb_layers) optimizer = get_optimizer( model_name, optim_name, initial_lr, epsilon=1e-2) model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) # Start training steps_per_epoch = 1281167 // batch_size if args.steps_per_epoch is not None: steps_per_epoch = args.steps_per_epoch validation_steps = 50000 // batch_size if args.validation_steps is not None: validation_steps = args.validation_steps warmup_steps = args.warmup_epochs * steps_per_epoch lr_sched = {0: 1, 30: 0.1, 60: 0.01, 80: 0.001} lr_sched_steps = { epoch * steps_per_epoch: multiplier for (epoch, multiplier) in lr_sched.items()} lrate = StepLearningRateScheduleWithWarmup(initial_lr=initial_lr, initial_global_step=0, warmup_steps=warmup_steps, decay_schedule=lr_sched_steps, verbose=0) save_name = model_name if not model_name.endswith('.h5') else \ os.path.split(model_name)[-1].split('.')[0].split('-')[0] model_ckpt = tf.keras.callbacks.ModelCheckpoint( os.path.join(args.model_dir, config.SAVE_DIR, save_name) + '-ckpt-{epoch:03d}.h5', monitor='train_loss') callbacks = [lrate, model_ckpt] if save_summary_steps is not None and save_summary_steps > 0: log_dir = os.path.join(args.model_dir, config.LOG_DIR) local_batch_size = batch_size if args.use_hpu_strategy: log_dir = os.path.join(log_dir, 'worker_' + str(comm_rank())) local_batch_size = batch_size // strategy.num_replicas_in_sync callbacks += [ TensorBoardWithHParamsV2( args.__dict__, log_dir=log_dir, update_freq=save_summary_steps, profile_batch=0), ExamplesPerSecondKerasHookV2( save_summary_steps, output_dir=log_dir, batch_size=local_batch_size), ] if (args.evaluate_checkpoint_path is not None): model.load_weights(args.evaluate_checkpoint_path) results = model.evaluate(x=ds_valid, steps=validation_steps) print("Test loss, Test acc:", results) exit() if ((resume_from_epoch is not None) and (resume_from_checkpoint_path is not None)): model.load_weights(resume_from_checkpoint_path) if args.deterministic: set_deterministic() if not os.path.isfile(args.dump_config): raise FileNotFoundError("wrong dump config path") import pickle x_path = os.path.join(args.inputs, "input") y_path = os.path.join(args.inputs, "target") x = pickle.load(open(x_path, 'rb')) y = pickle.load(open(y_path, 'rb')) with dump_callback(args.dump_config): model.fit(x=x, y=y, steps_per_epoch=steps_per_epoch, callbacks=callbacks, initial_epoch=resume_from_epoch, epochs=nb_epoch, shuffle=False, verbose=verbose, validation_data=None, validation_steps=0, ) else: ds_train = get_dataset(dataset_dir, args.train_subset, batch_size) ds_valid = get_dataset(dataset_dir, args.val_subset, batch_size) model.fit(x=ds_train, y=None, steps_per_epoch=steps_per_epoch, callbacks=callbacks, initial_epoch=resume_from_epoch, epochs=nb_epoch, shuffle=True, verbose=verbose, validation_data=(ds_valid, None), validation_steps=validation_steps, validation_freq=1, )
import tensorflow as tf from utils.dataset import get_dataset DATASET_DIR = os.path.join(os.environ['HOME'], 'data/ILSVRC2012/tfrecords') parser = argparse.ArgumentParser() parser.add_argument('subset', type=str, choices=['train', 'validation']) args = parser.parse_args() log_dir = os.path.join('logs', args.subset) shutil.rmtree(log_dir, ignore_errors=True) # clear prior log data dataset = get_dataset(DATASET_DIR, args.subset, batch_size=64) iterator = dataset.make_initializable_iterator() batch_xs, batch_ys = iterator.get_next() mean_rgb = tf.reduce_mean(batch_xs, axis=[0, 1, 2]) # convert normalized image back: [-1, 1] -> [0, 1] batch_imgs = tf.multiply(batch_xs, 0.5) batch_imgs = tf.add(batch_imgs, 0.5) summary_op = tf.summary.image('image_batch', batch_imgs, max_outputs=64) with tf.Session() as sess: writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(iterator.initializer) rgb = sess.run(mean_rgb) print('Mean RGB (-1.0~1.0):', rgb)
def process(options, trainCollection, valCollection, testCollection): lang = which_language(trainCollection) assert(which_language(trainCollection) == which_language(valCollection)) assert(which_language(trainCollection) == which_language(testCollection)) rootpath = options.rootpath overwrite = options.overwrite checkpoint = options.checkpoint init_model_from = options.init_model_from unroll = options.unroll corpus = options.corpus word2vec = options.word2vec batch_size = options.batch_size w2vv_config = options.model_config config = load_config('w2vv_configs/%s.py' % w2vv_config) img_feature = config.img_feature set_style = config.set_style # text embedding style (word2vec, bag-of-words, word hashing) text_style = config.text_style L1_normalize = config.L1_normalize L2_normalize = config.L2_normalize bow_vocab = config.bow_vocab+'.txt' l2_p = config.l2_p dropout = config.dropout max_epochs= config.max_epochs optimizer = config.optimizer loss_fun = config.loss_fun lr = config.lr clipnorm = config.clipnorm activation = config.activation sequences = config.sequences # lstm sent_maxlen = config.sent_maxlen embed_size = config.embed_size we_trainable = config.we_trainable lstm_size = config.lstm_size n_layers = map(int, config.n_layers.strip().split('-')) if init_model_from != '': init_model_name = init_model_from.strip().split("/")[-1] train_style = INFO + "_" + init_model_name else: train_style = INFO rnn_style, bow_style, w2v_style = text_style.strip().split('@') # text embedding style model_info = w2vv_config if 'lstm' in text_style or 'gru' in text_style: if lang == 'zh': w2v_data_path = os.path.join(rootpath, 'zh_w2v', 'model', 'zh_jieba.model') else: w2v_data_path = os.path.join(rootpath, "word2vec", corpus, word2vec) # bag-of-words vocabulary file path text_data_path = os.path.join(rootpath, trainCollection, "TextData", "vocabulary", "bow", bow_vocab) bow_data_path = os.path.join(rootpath, trainCollection, "TextData", "vocabulary", bow_style, bow_vocab) # text embedding (text representation) text2vec = get_text_encoder(rnn_style)(text_data_path, ndims=0, language=lang, L1_normalize=L1_normalize, L2_normalize=L2_normalize, maxlen=sent_maxlen) bow2vec = get_text_encoder(bow_style)(bow_data_path, ndims=0, language=lang, L1_normalize=L1_normalize, L2_normalize=L2_normalize) w2v2vec = get_text_encoder(w2v_style)(w2v_data_path, ndims=0, language=lang, L1_normalize=L1_normalize, L2_normalize=L2_normalize) if n_layers[0] == 0: n_layers[0] = bow2vec.ndims + w2v2vec.ndims else: assert n_layers[0] == bow2vec.ndims + w2v2vec.ndims # log file checkpoint_dir = os.path.join(rootpath, trainCollection, checkpoint, valCollection, train_style, model_info) else: logger.info("%s is not supported, please check the 'text_style' parameter", text_style) sys.exit(0) train_loss_hist_file = os.path.join(checkpoint_dir, 'train_loss_hist.txt') val_per_hist_file = os.path.join(checkpoint_dir, 'val_per_hist.txt') model_file_name = os.path.join(checkpoint_dir, 'model.json') model_img_name = os.path.join(checkpoint_dir, 'model.png') logger.info(model_file_name) if checkToSkip(model_file_name, overwrite): sys.exit(0) makedirsforfile(val_per_hist_file) # img2vec img_feat_path = os.path.join(rootpath, FULL_COLLECTION, 'FeatureData', img_feature) img_feats = BigFile(img_feat_path) val_img_feat_path = os.path.join(rootpath, FULL_COLLECTION, 'FeatureData', img_feature) val_img_feats = BigFile(val_img_feat_path) # dataset train_file = os.path.join(rootpath, trainCollection, 'TextData', '%s.caption.txt' % trainCollection) # training set # print "loss function: ", loss_fun dataset_style = 'sent_' + loss_fun DataSet = get_dataset(dataset_style) # represent text on the fly trainData = DataSet(train_file, batch_size, text2vec, bow2vec, w2v2vec, img_feats, flag_maxlen=True, maxlen=sent_maxlen) # get pre-trained word embedding we_weights = get_we_parameter(text2vec.vocab, w2v_data_path, lang) # define word2visualvec model w2vv = W2VV_MS( text2vec.nvocab, sent_maxlen, embed_size, we_weights, we_trainable, lstm_size, n_layers, dropout, l2_p, activation=activation, lstm_style=rnn_style, sequences=sequences, unroll=unroll) w2vv.save_json_model(model_file_name) w2vv.plot(model_img_name) w2vv.compile_model(optimizer, loss_fun, learning_rate = lr, clipnorm=clipnorm) if options.init_model_from != '': logger.info('initialize the model from %s', options.init_model_from) w2vv.init_model(options.init_model_from) # preparation for validation val_sent_file = os.path.join(rootpath, valCollection, 'TextData', '%s.caption.txt' % valCollection) val_sents_id, val_sents, val_id2sents = readSentsInfo(val_sent_file) val_img_list = map(str.strip, open(os.path.join(rootpath, valCollection, set_style, '%s.txt' % valCollection)).readlines()) sent_feats_1 = [] sent_feats_2 = [] new_val_sents_id = [] for index, sent in enumerate(val_sents): sent_vec = text2vec.mapping(sent) bow_vec = bow2vec.mapping(sent) w2v_vec = w2v2vec.mapping(sent) if sent_vec is not None and bow_vec is not None and w2v_vec is not None: sent_feats_1.append(sent_vec) sent_feats_2.append(list(bow_vec) + list(w2v_vec)) new_val_sents_id.append(val_sents_id[index]) sent_feats_1 = pad_sequences(sent_feats_1, maxlen=sent_maxlen, truncating='post') simer = get_simer('cosine_batch')() scorer = getScorer(options.val_metric) count = 0 lr_count = 0 best_validation_perf = 0 best_epoch = -1 train_loss_hist = [] val_per_hist = [] n_train_batches = int(np.ceil( 1.0 * trainData.datasize / batch_size )) if loss_fun == 'ctl': datasize = 2*trainData.datasize else: datasize = trainData.datasize for epoch in range(max_epochs): logger.info('Epoch %d', epoch) logger.info("Training..., learning rate: %g", w2vv.get_lr()) train_loss_epoch = [] train_progbar = generic_utils.Progbar(datasize) trainBatchIter = trainData.getBatchData() for minibatch_index in xrange(n_train_batches): train_X_batch, train_Y_batch = trainBatchIter.next() loss = w2vv.model.train_on_batch(train_X_batch, train_Y_batch) train_progbar.add(train_X_batch[0].shape[0], values=[("train loss", loss)]) train_loss_epoch.append(loss) train_loss_hist.append(np.mean(train_loss_epoch)) this_validation_perf = do_validation(val_img_list, val_img_feats, new_val_sents_id, sent_feats_1, sent_feats_2, simer, scorer, w2vv) val_per_hist.append(this_validation_perf) logger.info('previous_best_performance: %g', best_validation_perf) logger.info('current_performance: %g', this_validation_perf) fout_file = os.path.join(checkpoint_dir, 'epoch_%d.h5' % ( epoch)) lr_count += 1 if this_validation_perf > best_validation_perf: best_validation_perf = this_validation_perf count = 0 # save best model w2vv.model.save_weights(fout_file) if best_epoch != -1: os.system('rm '+ os.path.join(checkpoint_dir, 'epoch_%d.h5' % (best_epoch))) best_epoch = epoch else: # when the validation performance has decreased after an epoch, # we divide the learning rate by 2 and continue training; # but we use each learning rate for at least 3 epochs. if lr_count > 2: w2vv.decay_lr(0.5) lr_count = 0 count += 1 if count > 10: print ("Early stopping happend") break sorted_epoch_loss = zip(range(len(train_loss_hist)), train_loss_hist) with open(train_loss_hist_file, 'w') as fout: for i, loss in sorted_epoch_loss: fout.write("epoch_" + str(i) + " " + str(loss) + "\n") sorted_epoch_perf = sorted(zip(range(len(val_per_hist)), val_per_hist), key = lambda x: x[1], reverse=True) with open(val_per_hist_file, 'w') as fout: for i, perf in sorted_epoch_perf: fout.write("epoch_" + str(i) + " " + str(perf) + "\n") # generate the shell script for test templete = ''.join(open( 'TEMPLATE_do_test.sh').readlines()) striptStr = templete.replace('@@@rootpath@@@', rootpath) striptStr = striptStr.replace('@@@overwrite@@@', str(overwrite)) striptStr = striptStr.replace('@@@trainCollection@@@', trainCollection) striptStr = striptStr.replace('@@@testCollection@@@', '%s %s'%(valCollection, testCollection)) striptStr = striptStr.replace('@@@model_config@@@', w2vv_config) striptStr = striptStr.replace('@@@set_style@@@', set_style) striptStr = striptStr.replace('@@@model_path@@@', checkpoint_dir) striptStr = striptStr.replace('@@@model_name@@@', 'model.json') striptStr = striptStr.replace('@@@weight_name@@@', 'epoch_%d.h5' % sorted_epoch_perf[0][0]) runfile = 'do_test_%s_%s.sh' % (w2vv_config, testCollection) open( runfile, 'w' ).write(striptStr+'\n') os.system('chmod +x %s' % runfile) os.system('./%s' % runfile)
def train(model_name, dropout_rate, optim_name, epsilon, label_smoothing, use_lookahead, batch_size, iter_size, lr_sched, initial_lr, final_lr, weight_decay, epochs, dataset_dir): """Prepare data and train the model.""" batch_size = get_batch_size(model_name, batch_size) iter_size = get_iter_size(model_name, iter_size) initial_lr = get_initial_lr(model_name, initial_lr) final_lr = get_final_lr(model_name, final_lr) optimizer = get_optimizer(model_name, optim_name, initial_lr, epsilon) weight_decay = get_weight_decay(model_name, weight_decay) # get training and validation data ds_train = get_dataset(dataset_dir, 'train', batch_size) # 300 modification ds_valid = get_dataset(dataset_dir, 'validation', batch_size) # 300 modification # ds_train = get_dataset("/lustre/project/EricLo/cx/imagenet/imagenet_1000classes_train/", 'train', batch_size) # 1000 modification # ds_valid = get_dataset("/lustre/project/EricLo/cx/imagenet/imagenet_1000classes_val/", 'validation', batch_size) # 1000 modification mirrored_strategy = tf.distribute.MirroredStrategy( cross_device_ops=tf.distribute.NcclAllReduce(num_packs=2)) # mirrored_strategy = tf.distribute.MirroredStrategy() with mirrored_strategy.scope(): model = get_training_model(model_name=model_name, dropout_rate=dropout_rate, optimizer=optimizer, label_smoothing=label_smoothing, use_lookahead=use_lookahead, iter_size=iter_size, weight_decay=weight_decay, gpus=NUM_GPU) # model = tf.keras.models.load_model("./saves/keras_save") class PrintAcc(tf.keras.callbacks.Callback): def on_epoch_end(self, epoch, logs=None): print( f"Epoch{epoch+1} acc#{logs.get('acc')}# val_acc#{logs.get('val_acc')} val_top_k_categorical_accuracy#{logs.get('val_top_k_categorical_accuracy')}" ) NUM_DISTRIBUTE = NUM_GPU if NUM_GPU > 0 else 1 # steps = int(1281167 / batch_size / NUM_DISTRIBUTE) # train_steps = int(1281167 / batch_size) # 1000 classes # val_steps = int(50000 / batch_size) # 1000 classes # train_steps = int(383690 / batch_size) # 300 modification # val_steps = int(15000 / batch_size) # 300 modification train_steps = int(642289 / batch_size) # 500 modification val_steps = int(25000 / batch_size) # 500 modification # steps = int(192439 / batch_size / NUM_DISTRIBUTE) # 600 modification print( f"[INFO] Total Epochs:{epochs} Train Steps:{train_steps} Validate Steps: {val_steps} Workers:{NUM_DISTRIBUTE} Batch size:{batch_size}" ) his = model.fit( x=ds_train, steps_per_epoch=train_steps, validation_data=ds_valid, validation_steps=val_steps, callbacks=[ get_lr_func(epochs, lr_sched, initial_lr, final_lr, NUM_GPU) ], # The following doesn't seem to help in terms of speed. # use_multiprocessing=True, workers=4, epochs=epochs, verbose=2) # print(his.history) final_acc = 0. if len( his.history['val_top_k_categorical_accuracy'] ) < 1 else his.history['val_top_k_categorical_accuracy'][-1] print(f"Final acc:{final_acc}") nni.report_final_result(final_acc)
parser.add_argument('--epochs', type=int, default=200, help='number of epochs of training') parser.add_argument('--model', type=str, default='resnet20') parser.add_argument('--output_dir', type=str, default='./checkpoint/') parser.add_argument('--seed', type=int, default=1, help='random seed') args = parser.parse_args() torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) os.makedirs(args.output_dir, exist_ok=True) acc_best = 0 data_train_loader, data_test_loader = dataset.get_dataset(args) net = models.get_model(args.model, args).cuda() criterion = torch.nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4) lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, [80, 160], gamma=0.1) def train(epoch): net.train() loss_list, batch_list = [], [] for i, (images, labels) in enumerate(data_train_loader): images, labels = Variable(images).cuda(), Variable(labels).cuda()
def main(): parser = argparse.ArgumentParser(description=DESCRIPTION) parser.add_argument('--dataset_dir', type=str, default=config.DEFAULT_DATASET_DIR) parser.add_argument('--batch_size', type=int, default=20) parser.add_argument('--inv_model_file', type=str, help='a saved model (.h5) file') args = parser.parse_args() config_keras_backend() if not args.inv_model_file.endswith('.h5'): sys.exit('model_file is not a .h5') inv_model = tf.keras.models.load_model(args.inv_model_file, compile=False, custom_objects={'AdamW': AdamW}) inv_model.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=['accuracy']) ds_validation = get_dataset(args.dataset_dir, 'validation', args.batch_size) ## VGG vgg_model = VGG19(include_top=True, weights='imagenet', classes=1000) vgg_model.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=['accuracy']) # InceptionV3 inception_model = InceptionV3(include_top=True, weights='imagenet', classes=1000) inception_model.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=['accuracy']) ## ResNet resnet_model = ResNet50(include_top=True, weights='imagenet', classes=1000) resnet_model.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=['accuracy']) # Process batches iteration = 0 sum1 = 0 sum2 = 0 for images, labels in tfds.as_numpy(ds_validation): if iteration < 199: print('continuing') iteration += 1 continue if iteration == 500: exit() labels = np.argmax(labels, axis=1) #adv_imgs = run_attack(True, 'CarliniL2Method', inception_model, images, labels, batch_size=args.batch_size, dataset='cifar', fgsm_epsilon=0.3, cwl2_confidence=0) #adv_imgs = run_attack(False, 'DeepFool', inception_model, images, labels, batch_size=args.batch_size, dataset='cifar', fgsm_epsilon=0.3, cwl2_confidence=0) adv_imgs = run_attack(False, 'FastGradientMethod', inception_model, images, labels, batch_size=args.batch_size, dataset='cifar', fgsm_epsilon=0.3, cwl2_confidence=0) #adv_imgs = run_attack(False, 'ProjectedGradientDescent', inception_model, images, labels, batch_size=10, dataset='cifar', fgsm_epsilon=0.1, cwl2_confidence=0) ## VGG ################################################ #img *= (2.0/255) # normalize to: 0.0~2.0 #img -= 1.0 # subtract mean to make it: -1.0~1.0 #img = np.expand_dims(img, axis=0) vgg_imgs = [] resnet_imgs = [] inc_imgs = [] flip_imgs = [] inv_imgs = [] adv_vgg_imgs = [] adv_resnet_imgs = [] adv_inc_imgs = [] adv_flip_imgs = [] adv_inv_imgs = [] for ii in range(images.shape[0]): img = copy.deepcopy(images[ii, :, :, :]) img += 1.0 #img /= (2.0/255) img *= (255.0 / 2.0) ## VGG vgg_img = copy.deepcopy(img) vgg_img = cv2.resize(vgg_img, (224, 224)) vgg_img = vgg_preprocess_input(vgg_img) vgg_imgs.append(vgg_img) ## Resnet resnet_img = copy.deepcopy(img) resnet_img = cv2.resize(resnet_img, (224, 224)) resnet_img = resnet_preprocess_input(resnet_img) resnet_imgs.append(resnet_img) ## InceptionV3 inc_img = copy.deepcopy(img) inc_img = cv2.resize(inc_img, (299, 299)) inc_img = inception_preprocess_input(inc_img) inc_imgs.append(inc_img) ## Flipped #flip_img = copy.deepcopy(img) #flip_img = cv2.resize(flip_img, (299, 299)) #flip_img = cv2.flip(flip_img, 1) #flip_img = inception_preprocess_input(flip_img) #flip_imgs.append(flip_img) flip_img = copy.deepcopy(images[ii, :, :, :]) flip_img = cv2.flip(flip_img, 1) flip_imgs.append(flip_img) ## Inverse inv_img = copy.deepcopy(images[ii, :, :, :]) ######### inv_img += 1.0 inv_img /= 2.0 inv_img = 1 - inv_img inv_img *= 255.0 inv_img = cv2.resize(inv_img, (299, 299)) inv_img = inception_preprocess_input(inv_img) inv_imgs.append(inv_img) #========================================== # ADVERSARIAL --------------- adv_img = copy.deepcopy(adv_imgs[ii, :, :, :]) adv_img += 1.0 #adv_img /= (2.0/255) adv_img *= (255.0 / 2.0) # VGG adv_vgg_img = copy.deepcopy(adv_img) adv_vgg_img = cv2.resize(adv_vgg_img, (224, 224)) adv_vgg_img = vgg_preprocess_input(adv_vgg_img) adv_vgg_imgs.append(adv_vgg_img) # Resnet adv_resnet_img = copy.deepcopy(adv_img) adv_resnet_img = cv2.resize(adv_resnet_img, (224, 224)) adv_resnet_img = resnet_preprocess_input(adv_resnet_img) adv_resnet_imgs.append(adv_resnet_img) # InceptionV3 adv_inc_img = copy.deepcopy(adv_img) adv_inc_img = cv2.resize(adv_inc_img, (299, 299)) adv_inc_img = inception_preprocess_input(adv_inc_img) adv_inc_imgs.append(adv_inc_img) ## Flipped #adv_flip_img = copy.deepcopy(img) #adv_flip_img = cv2.resize(adv_flip_img, (299, 299)) #adv_flip_img = cv2.flip(adv_flip_img, 1) #adv_flip_img = inception_preprocess_input(adv_flip_img) #adv_flip_imgs.append(adv_flip_img) adv_flip_img = copy.deepcopy(adv_imgs[ii, :, :, :]) adv_flip_img = cv2.flip(adv_flip_img, 1) adv_flip_imgs.append(adv_flip_img) ## Inverse ##test on inverse Inceptionv3 adv_inv_img = copy.deepcopy(adv_imgs[ii, :, :, :]) ######### adv_inv_img += 1.0 adv_inv_img /= 2.0 adv_inv_img = 1 - adv_inv_img adv_inv_img *= 255.0 adv_inv_img = cv2.resize(adv_inv_img, (299, 299)) adv_inv_img = inception_preprocess_input(adv_inv_img) adv_inv_imgs.append(adv_inv_img) # Horizontal Flipping # test on Resnet vgg_imgs = np.asarray(vgg_imgs) resnet_imgs = np.asarray(resnet_imgs) inc_imgs = np.asarray(inc_imgs) flip_imgs = np.asarray(flip_imgs) inv_imgs = np.asarray(inv_imgs) adv_vgg_imgs = np.asarray(adv_vgg_imgs) adv_resnet_imgs = np.asarray(adv_resnet_imgs) adv_inc_imgs = np.asarray(adv_inc_imgs) adv_flip_imgs = np.asarray(adv_flip_imgs) adv_inv_imgs = np.asarray(adv_inv_imgs) # Default ResNet accuracy _, results1 = resnet_model.evaluate(x=resnet_imgs, y=labels, verbose=0) _, results2 = vgg_model.evaluate(x=vgg_imgs, y=labels, verbose=0) _, results3 = inception_model.evaluate(x=inc_imgs, y=labels, verbose=0) _, results4 = inception_model.evaluate(x=flip_imgs, y=labels, verbose=0) _, results5 = inv_model.evaluate(x=inv_imgs, y=labels, verbose=0) # print('-----------------------------------------------------') _, results6 = resnet_model.evaluate(x=adv_resnet_imgs, y=labels, verbose=0) _, results7 = vgg_model.evaluate(x=adv_vgg_imgs, y=labels, verbose=0) _, results8 = inception_model.evaluate(x=adv_inc_imgs, y=labels, verbose=0) _, results9 = inception_model.evaluate(x=adv_flip_imgs, y=labels, verbose=0) _, results10 = inv_model.evaluate(x=adv_inv_imgs, y=labels, verbose=0) print(iteration) print(results1, results6) print(results2, results7) print(results3, results8) print(results4, results9) print(results5, results10) with open("kot_fgsm_untarg.txt", "a") as myfile: myfile.write( str(results1) + ' ' + str(results2) + ' ' + str(results3) + ' ' + str(results4) + ' ' + str(results5) + ' ' + str(results6) + ' ' + str(results7) + ' ' + str(results8) + ' ' + str(results9) + ' ' + str(results10) + '\n') iteration += 1 #exit() #results = resnet_model.evaluate(x=adv_imgs, y=to_categorical(labels, 1000)) #print('RESNET test loss, test acc:', results) #results = vgg_model.evaluate(x=adv_imgs, y=to_categorical(labels, 1000)) #print('VGG test loss, test acc:', results) # labels = np.argmax(labels, axis=1) # # #results = model.evaluate( # # x=images, y=to_categorical(labels, 1000)) # #print('test loss, test acc:', results) # total = total + images.shape[0] # print(total) exit() results = resnet_model.evaluate(x=ds_validation, steps=50000 // args.batch_size) print('test loss, test acc:', results) clear_keras_session()
def train(model_name, dropout_rate, optim_name, epsilon, label_smoothing, use_lookahead, batch_size, iter_size, lr_sched, initial_lr, final_lr, weight_decay, epochs, iterations, dataset_dir, skip_eval, eval_checkpoint, run_on_hpu, measure_perf, extract_tensors_cfg_file_path, bfloat16, train_subset, val_subset): if not run_on_hpu: strategy = tf.distribute.MirroredStrategy() print('Number of devices: {}'.format(strategy.num_replicas_in_sync)) """Prepare data and train the model.""" batch_size = get_batch_size(model_name, batch_size) iter_size = get_iter_size(model_name, iter_size) initial_lr = get_initial_lr(model_name, initial_lr) final_lr = get_final_lr(model_name, final_lr) optimizer = get_optimizer(model_name, optim_name, initial_lr, epsilon) weight_decay = get_weight_decay(model_name, weight_decay) # get training and validation data ds_train = get_dataset(dataset_dir, train_subset, batch_size) if skip_eval: ds_valid = None else: ds_valid = get_dataset(dataset_dir, val_subset, batch_size) # instantiate training callbacks lrate = get_lr_func(epochs, lr_sched, initial_lr, final_lr) save_name = model_name if not model_name.endswith('.h5') else \ os.path.split(model_name)[-1].split('.')[0].split('-')[0] model_ckpt = tf.keras.callbacks.ModelCheckpoint( os.path.join(config.SAVE_DIR, save_name) + '-ckpt-{epoch:03d}.h5', monitor='train_loss') tensorboard = tf.keras.callbacks.TensorBoard( log_dir='{}/{}'.format(config.LOG_DIR, time.time())) if iterations: steps_per_epoch = iterations print(f"Changing steps per epoch to {steps_per_epoch}") else: steps_per_epoch = 1281167 // batch_size if skip_eval: val_steps = 0 else: val_steps = 50000 // batch_size # build model and do training get_training_model_kwargs = { "model_name": model_name, "dropout_rate": dropout_rate, "optimizer": optimizer, "label_smoothing": label_smoothing, "use_lookahead": use_lookahead, "iter_size": iter_size, "weight_decay": weight_decay, "batch_size": batch_size } if not run_on_hpu: with strategy.scope(): model = get_training_model(**get_training_model_kwargs) else: if bfloat16: # Bf16 conversion, full list os.environ['TF_ENABLE_BF16_CONVERSION'] = 'full' else: os.environ['TF_ENABLE_BF16_CONVERSION'] = "false" print("train: Set TF_ENABLE_BF16_CONVERSION: " + os.environ.get('TF_ENABLE_BF16_CONVERSION')) model = get_training_model(**get_training_model_kwargs) if eval_checkpoint != None: model.load_weights(eval_checkpoint) results = model.evaluate(x=ds_valid, steps=val_steps) print("Test loss, Test acc:", results) exit() x = ds_train y = None callbacks = [lrate, model_ckpt] shuffle = True if measure_perf: callbacks += [KerasMeasurePerfCallback(model, batch_size)] if extract_tensors_cfg_file_path != None: tenorsExtractionCallback = KerasTensorExtractionCallback( model, extract_tensors_cfg_file_path) callbacks += [tenorsExtractionCallback] x = tenorsExtractionCallback.get_input() y = tenorsExtractionCallback.get_target() steps_per_epoch = 1 epochs = 1 ds_valid = None val_steps = 0 shuffle = False model.fit(x=x, y=y, steps_per_epoch=steps_per_epoch, validation_data=ds_valid, validation_steps=val_steps, callbacks=callbacks, epochs=epochs, shuffle=shuffle) # training finished model.save('{}/{}-model-final.h5'.format(config.SAVE_DIR, save_name))
timestr = time.strftime("%Y.%m.%d-%H%M%S") os.mkdir("./logs/" + timestr) args.output_dir = "./logs/" + timestr logging.basicConfig( filename="./logs/" + timestr + '/log', filemode="w", format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%d-%M-%Y %H:%M:%S", level=logging.DEBUG) logging.info(args) accr = 0 accr_best = 0 print('==>load data') _, data_test_loader = dataset.get_dataset(args) generator = models.generator.Generator(args).cuda() generator = nn.DataParallel(generator, device_ids=args.gpus) teacher = models.get_model(args.t_model, args).cuda() teacher.load_state_dict( torch.load('./checkpoint/' + args.t_model + '_' + args.dataset + '.pth')) teacher.eval() teacher = nn.DataParallel(teacher, device_ids=args.gpus) criterion = torch.nn.CrossEntropyLoss().cuda() net = models.get_model(args.s_model, args).cuda() net = nn.DataParallel(net, device_ids=args.gpus)
acc_count += correct.sum().item() acc = acc_count / (len(data) * data.batch_size) * 100 print('----------Acc: {}%----------'.format(acc)) return sum(loss_list) / len(loss_list), acc if __name__ == '__main__': # == Setting == device = torch.device('cpu') print('Using', device) # == Data == data_name = 'mnist' print('Data using: {}'.format(data_name)) train_data, test_data = get_dataset(data_name) # == Model == model = LeNet() model = model.to(device) # == optimizer == criterion = torch.nn.CrossEntropyLoss().to(device) optimizer = torch.optim.Adam(model.parameters(), lr=0.0001) # == Main Loop == max_acc = 0 max_epoch = 30 scheduler = StepLR(optimizer=optimizer, step_size=10) # first epoch
def main(): parser = argparse.ArgumentParser(description=DESCRIPTION) parser.add_argument('--dataset', '--dataset_dir', metavar='PATH', default=config.DEFAULT_DATASET_DIR, help='Dataset directory.') parser.add_argument('--optimizer', default='sgd', choices=['sgd', 'adam', 'rmsprop'], help='Optimizer.') parser.add_argument('-d', '--dtype', default='fp32', choices=['fp32', 'bf16'], help='Data type.') parser.add_argument('--batch_size', type=int, default=32, help='Global batch size.') parser.add_argument('--lr_sched', default='WarmupCosine', choices=[ 'linear', 'exp', 'steps', 'constant', 'WarmupCosine'], help='Learning rate scheduler.') parser.add_argument('--initial_lr', type=float, default=6e-2, help='Initial learning rate.') parser.add_argument('--final_lr', type=float, default=1e-5, help='Final learning rate.') parser.add_argument('--warmup_steps', type=int, default=4000, help='Warmup steps.') parser.add_argument('--epochs', type=int, default=10, help='Total number of epochs for training.') parser.add_argument('--steps_per_epoch', type=int, help='Number of steps for training per epoch, overrides default value.') parser.add_argument('--validation_steps', type=int, help='Number of steps for validation, overrides default value.') parser.add_argument('--model', default='ViT-B_16', choices=['ViT-B_16', 'ViT-L_16', 'ViT-B_32', 'ViT-L_32'], help='Model.') parser.add_argument('--train_subset', default='train', help='Pattern to detect train subset in dataset directory.') parser.add_argument('--val_subset', default='validation', help='Pattern to detect validation subset in dataset directory.') parser.add_argument('--grad_accum_steps', type=int, default=8, help='Gradient accumulation steps.') parser.add_argument('--resume_from_checkpoint_path', metavar='PATH', help='Path to checkpoint to start from.') parser.add_argument('--resume_from_epoch', metavar='EPOCH_INDEX', type=int, default=0, help='Initial epoch index.') parser.add_argument('--evaluate_checkpoint_path', metavar='PATH', help='Checkpoint path for evaluating the model on --val_subset') parser.add_argument('--weights_path', metavar='PATH', help='Path to weights cache directory. ~/.keras is used if not set.') parser.add_argument('--deterministic', action='store_true', default=False, help='Enable deterministic behavior, this will also disable data augmentation. --seed must be set.') parser.add_argument('--seed', type=int, help='Seed to be used by random functions.') parser.add_argument('--device', default='HPU', choices=['CPU', 'HPU'], help='Device type.') parser.add_argument('--distributed', action='store_true', default=False, help='Enable distributed training.') parser.add_argument('--base_tf_server_port', type=int, default=7850, help='Rank 0 port used by tf.distribute.') parser.add_argument('--save_summary_steps', type=int, default=0, help='Steps between saving summaries to TensorBoard.') parser.add_argument('--recipe_cache', default='/tmp/vit_recipe_cache', help='Path to recipe cache directory. Set to empty to disable recipe cache. Externally set \'TF_RECIPE_CACHE_PATH\' will override this setting.') parser.add_argument( '--dump_config', help='Side-by-side config file. Internal, do not use.') args = parser.parse_args() if args.weights_path is not None: config.WEIGHTS_DIR = args.weights_path if args.dtype == 'bf16': tf.keras.mixed_precision.set_global_policy('mixed_bfloat16') if args.device == 'HPU': if args.distributed: os.environ['TF_HCCL_MEMORY_ALLOWANCE_MB'] = '500' from habana_frameworks.tensorflow import load_habana_module from habana_frameworks.tensorflow.ops.layer_norm import HabanaLayerNormalization load_habana_module() tf.keras.layers.LayerNormalization = HabanaLayerNormalization # Handle recipe caching. recipe_cache = args.recipe_cache if 'TF_RECIPE_CACHE_PATH' not in os.environ.keys() and recipe_cache: os.environ['TF_RECIPE_CACHE_PATH'] = recipe_cache # Clear previous recipe cache. if not args.distributed or comm_rank() == 0: if os.path.exists(recipe_cache) and os.path.isdir(recipe_cache): import shutil shutil.rmtree(recipe_cache) # Wait for rank 0 to remove cache. if args.distributed: from mpi4py import MPI MPI.COMM_WORLD.Barrier() # Handle determinism. config.DETERMINISTIC = args.deterministic config.SEED = args.seed if args.deterministic: assert args.seed is not None, "Deterministic behavior require seed to be set." tf.config.threading.set_inter_op_parallelism_threads(1) tf.config.threading.set_intra_op_parallelism_threads(1) os.environ['TF_DETERMINISTIC_OPS'] = '1' config.DATA_AUGMENTATION = False if args.seed is not None: random.seed(args.seed) np.random.seed(args.seed) tf.random.set_seed(args.seed) # Handle distribution strategy. if args.distributed: tf_distribute_config(args.base_tf_server_port) if args.device == 'HPU': os.environ['HBN_TF_REGISTER_DATASETOPS'] = '1' from habana_frameworks.tensorflow.distribute import HPUStrategy strategy = HPUStrategy() else: strategy = tf.distribute.MultiWorkerMirroredStrategy() else: strategy = tf.distribute.OneDeviceStrategy(f'device:{args.device}:0') if not args.distributed or comm_rank() == 0: print('Number of devices: {}'.format(strategy.num_replicas_in_sync)) num_classes = 1000 batch_size = args.batch_size nb_epoch = args.epochs dataset = args.dataset resume_from_checkpoint_path = args.resume_from_checkpoint_path resume_from_epoch = args.resume_from_epoch optim_name = args.optimizer initial_lr = args.initial_lr final_lr = args.final_lr lr_sched = args.lr_sched warmup_steps = args.warmup_steps model_name = args.model grad_accum_steps = args.grad_accum_steps ds_train = get_dataset(dataset, args.train_subset, batch_size, is_training=True, distributed=args.distributed) ds_valid = get_dataset(dataset, args.val_subset, batch_size, False, distributed=args.distributed) if args.dump_config is not None: vit.CONFIG_B['dropout'] = 0.0 vit.CONFIG_L['dropout'] = 0.0 # Load our model with strategy.scope(): image_size = 384 if model_name == 'ViT-B_16': model = vit.vit_b16( image_size=image_size, activation='softmax', pretrained=True, include_top=True, pretrained_top=False, classes=num_classes, weights="imagenet21k") elif model_name == 'ViT-L_16': model = vit.vit_l16( image_size=image_size, activation='softmax', pretrained=True, include_top=True, pretrained_top=False, classes=num_classes, weights="imagenet21k") elif model_name == 'ViT-B_32': model = vit.vit_b32( image_size=image_size, activation='softmax', pretrained=True, include_top=True, pretrained_top=False, classes=num_classes, weights="imagenet21k") elif model_name == 'ViT-L_32': model = vit.vit_l32( image_size=image_size, activation='softmax', pretrained=True, include_top=True, pretrained_top=False, classes=num_classes, weights="imagenet21k") else: print( "Model is not supported, please use either ViT-B_16 or ViT-L_16 or ViT-B_32 or ViT-L_32") exit(0) optimizer = get_optimizer( optim_name, initial_lr, accumulation_steps=grad_accum_steps, epsilon=1e-2) model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'], run_eagerly=False) # Start training steps_per_epoch = 1281167 // batch_size if args.steps_per_epoch is not None: steps_per_epoch = args.steps_per_epoch validation_steps = 50000 // batch_size if args.validation_steps is not None: validation_steps = args.validation_steps total_steps = nb_epoch * steps_per_epoch resume_step = resume_from_epoch * steps_per_epoch lrate = get_lr_func(nb_epoch, lr_sched, initial_lr, final_lr, warmup_steps, resume_step, total_steps) save_name = model_name if not model_name.endswith('.h5') else \ os.path.split(model_name)[-1].split('.')[0].split('-')[0] model_ckpt = tf.keras.callbacks.ModelCheckpoint( os.path.join(config.SAVE_DIR, save_name) + '-ckpt-{epoch:03d}.h5', monitor='train_loss') callbacks = [lrate, model_ckpt] if args.save_summary_steps > 0: callbacks += [TensorBoardWithHParamsV2( vars(args), log_dir=config.LOG_DIR, update_freq=args.save_summary_steps)] callbacks += [ExamplesPerSecondKerasHookV2( output_dir=config.LOG_DIR, every_n_steps=args.save_summary_steps, batch_size=args.batch_size)] if (args.evaluate_checkpoint_path is not None): model.load_weights(args.evaluate_checkpoint_path) results = model.evaluate(x=ds_valid, steps=validation_steps) print("Test loss, Test acc:", results) exit() if ((resume_from_epoch is not None) and (resume_from_checkpoint_path is not None)): model.load_weights(resume_from_checkpoint_path) with dump_callback(args.dump_config): model.fit(x=ds_train, y=None, steps_per_epoch=steps_per_epoch, callbacks=callbacks, initial_epoch=resume_from_epoch, epochs=nb_epoch, shuffle=not args.deterministic, verbose=1 if not args.distributed else comm_rank() == 0, validation_data=(ds_valid, None), validation_steps=validation_steps, ) if not args.distributed or comm_rank() == 0: model.save(f'{config.SAVE_DIR}/{save_name}-model-final.h5')
def train(model_name, dropout_rate, optim_name, epsilon, label_smoothing, use_lookahead, batch_size, iter_size, lr_sched, initial_lr, final_lr, weight_decay, epochs, dataset_dir, cross_device_ops, num_packs, tf_gpu_thread_mode): start = time.time() """Prepare data and train the model.""" if tf_gpu_thread_mode in ["global", "gpu_private", "gpu_shared"]: os.environ['TF_GPU_THREAD_MODE'] = tf_gpu_thread_mode batch_size = get_batch_size(model_name, batch_size) iter_size = get_iter_size(model_name, iter_size) initial_lr = get_initial_lr(model_name, initial_lr) final_lr = get_final_lr(model_name, final_lr) optimizer = get_optimizer(model_name, optim_name, initial_lr, epsilon) weight_decay = get_weight_decay(model_name, weight_decay) # get training and validation data ds_train = get_dataset(dataset_dir, 'train', batch_size) # 300 modification ds_valid = get_dataset(dataset_dir, 'validation', batch_size) # 300 modification # ds_train = get_dataset("/lustre/project/EricLo/cx/imagenet/imagenet_1000classes_train/", 'train', batch_size) # 1000 modification # ds_valid = get_dataset("/lustre/project/EricLo/cx/imagenet/imagenet_1000classes_val/", 'validation', batch_size) # 1000 modification if cross_device_ops == "HierarchicalCopyAllReduce": mirrored_strategy = tf.distribute.MirroredStrategy( cross_device_ops=tf.distribute.HierarchicalCopyAllReduce( num_packs=num_packs)) elif cross_device_ops == "NcclAllReduce": mirrored_strategy = tf.distribute.MirroredStrategy( cross_device_ops=tf.distribute.NcclAllReduce(num_packs=num_packs)) else: mirrored_strategy = tf.distribute.MirroredStrategy() with mirrored_strategy.scope(): model = get_training_model(model_name=model_name, dropout_rate=dropout_rate, optimizer=optimizer, label_smoothing=label_smoothing, use_lookahead=use_lookahead, iter_size=iter_size, weight_decay=weight_decay, gpus=NUM_GPU) class PrintAcc(tf.keras.callbacks.Callback): def on_epoch_end(self, epoch, logs=None): print(f"Epoch{epoch+1} {logs}") NUM_DISTRIBUTE = NUM_GPU if NUM_GPU > 0 else 1 # train_steps = int(1281167 / batch_size) # 1000 classes # val_steps = int(50000 / batch_size) # 1000 classes # train_steps = int(383690 / batch_size) # 300 modification # val_steps = int(15000 / batch_size) # 300 modification train_steps = int(642289 / batch_size) # 500 modification val_steps = int(25000 / batch_size) # 500 modification print( f"[INFO] Total Epochs:{epochs} Train Steps:{train_steps} Validate Steps: {val_steps} Workers:{NUM_DISTRIBUTE} Batch size:{batch_size}" ) his = model.fit( x=ds_train, steps_per_epoch=train_steps, validation_data=ds_valid, validation_steps=val_steps, callbacks=[ get_lr_func(epochs, lr_sched, initial_lr, final_lr, NUM_GPU) ], # The following doesn't seem to help in terms of speed. # use_multiprocessing=True, workers=4, epochs=epochs, verbose=2) end = time.time() fit_time = (end - start) / 3600.0 acc = 0. if len( his.history['val_top_k_categorical_accuracy'] ) < 1 else his.history['val_top_k_categorical_accuracy'][-1] print(f"[TRIAL END] time: {fit_time} {his.history}") return acc, fit_time
def main(): parser = argparse.ArgumentParser(description=DESCRIPTION) parser.add_argument('--dataset_dir', type=str, default=config.DEFAULT_DATASET_DIR) parser.add_argument('--batch_size', type=int, default=5) args = parser.parse_args() config_keras_backend() ds_validation = get_dataset(args.dataset_dir, 'validation', args.batch_size) # InceptionV3 inception_model = InceptionV3(include_top=True, weights='imagenet', classes=1000) inception_model.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=['accuracy']) # Process batches iteration = 0 sum1 = 0 sum2 = 0 for images, labels in tfds.as_numpy(ds_validation): if iteration < 31: print('continuing') iteration += 1 continue if iteration == 1000: exit() labels = np.argmax(labels, axis=1) #adv_imgs = run_attack(False, 'CarliniL2Method', inception_model, images, labels, batch_size=5, dataset='cifar', fgsm_epsilon=0.3, cwl2_confidence=40) adv_imgs = run_attack(False, 'CarliniLInfMethod', inception_model, images, labels, batch_size=5, dataset='cifar', fgsm_epsilon=0.3, cwl2_confidence=0) #adv_imgs = run_attack(False, 'DeepFool', inception_model, images, labels, batch_size=args.batch_size, dataset='cifar', fgsm_epsilon=0.3, cwl2_confidence=0) #adv_imgs = run_attack(True, 'FastGradientMethod', inception_model, images, labels, batch_size=args.batch_size, dataset='cifar', fgsm_epsilon=0.1, cwl2_confidence=0) #adv_imgs = run_attack(True, 'ProjectedGradientDescent', inception_model, images, labels, batch_size=args.batch_size, dataset='cifar', fgsm_epsilon=0.1, cwl2_confidence=0) ## VGG ################################################ inc_imgs = [] adv_inc_imgs = [] for ii in range(images.shape[0]): img = copy.deepcopy(images[ii, :, :, :]) img += 1.0 #img /= (2.0/255) img *= (255.0 / 2.0) ## InceptionV3 inc_img = copy.deepcopy(img) inc_img = cv2.resize(inc_img, (299, 299)) inc_img = inception_preprocess_input(inc_img) inc_imgs.append(inc_img) #========================================== # ADVERSARIAL --------------- adv_img = copy.deepcopy(adv_imgs[ii, :, :, :]) adv_img += 1.0 #adv_img /= (2.0/255) adv_img *= (255.0 / 2.0) # InceptionV3 adv_inc_img = copy.deepcopy(adv_img) adv_inc_img = cv2.resize(adv_inc_img, (299, 299)) adv_inc_img = inception_preprocess_input(adv_inc_img) adv_inc_imgs.append(adv_inc_img) inc_imgs = np.asarray(inc_imgs) adv_inc_imgs = np.asarray(adv_inc_imgs) # Default ResNet accuracy # _, results3 = inception_model.evaluate(x=inc_imgs, y=labels, verbose=0) # _, results8 = inception_model.evaluate(x=adv_inc_imgs, y=labels, verbose=0) adv_inc_imgs = np.nan_to_num(adv_inc_imgs) inc_imgs = np.nan_to_num(inc_imgs) norm_diffs_1 = [ np.linalg.norm( np.subtract(adv_inc_imgs[ii].flatten(), inc_imgs[ii].flatten()), 1) for ii in range(inc_imgs.shape[0]) ] norm_diffs_2 = [ np.linalg.norm( np.subtract(adv_inc_imgs[ii].flatten(), inc_imgs[ii].flatten()), 2) for ii in range(inc_imgs.shape[0]) ] norm_diffs_inf = [ np.linalg.norm( np.subtract(adv_inc_imgs[ii].flatten(), inc_imgs[ii].flatten()), np.inf) for ii in range(inc_imgs.shape[0]) ] print(iteration) print(np.mean(norm_diffs_1), np.mean(norm_diffs_2), np.mean(norm_diffs_inf)) #with open("distances_cw0_untarg.txt", "a") as myfile: # myfile.write(str(np.mean(norm_diffs_1)) + ' ' + str(np.mean(norm_diffs_2)) + ' ' + str(np.mean(norm_diffs_inf)) + '\n' ) iteration += 1 print(norm_diffs_1) #print(adv_inc_imgs[0]) #print(inc_imgs[0]) exit() #results = resnet_model.evaluate(x=adv_imgs, y=to_categorical(labels, 1000)) #print('RESNET test loss, test acc:', results) #results = vgg_model.evaluate(x=adv_imgs, y=to_categorical(labels, 1000)) #print('VGG test loss, test acc:', results) # labels = np.argmax(labels, axis=1) # # #results = model.evaluate( # # x=images, y=to_categorical(labels, 1000)) # #print('test loss, test acc:', results) # total = total + images.shape[0] # print(total) exit() results = resnet_model.evaluate(x=ds_validation, steps=50000 // args.batch_size) print('test loss, test acc:', results) clear_keras_session()