def train(epoch, batch_idx, model, data, labels, optimizer, x_advs=None): model.train() optimizer.zero_grad() # Generate cross-entropy loss for training logits = model(data) preds = logits.max(1)[1] loss1 = gen_adv_loss(logits, labels, mean=True) # add adversarial training loss if x_advs is not None: # choose source of adversarial examples at random # (for ensemble adversarial training) idx = np.random.randint(len(x_advs)) logits_adv = model(x_advs[idx]) loss2 = gen_adv_loss(logits_adv, labels, mean=True) loss = 0.5 * (loss1 + loss2) else: loss2 = torch.zeros(loss1.size()) loss = loss1 loss.backward() optimizer.step() if batch_idx % EVAL_FREQUENCY == 0: print( 'Step: {}(epoch: {})\tLoss: {:.6f}<=({:.6f}, {:.6f})\tError: {:.2f}%' .format(batch_idx, epoch + 1, loss.item(), loss1.item(), loss2.item(), error_rate(preds, labels)))
def tf_train(x, y, model, X_train, Y_train, generator, x_advs=None, benign=None, cross_lip=None): old_vars = set(tf.global_variables()) train_size = Y_train.shape[0] # Generate cross-entropy loss for training logits = model(x) # print(K.int_shape(logits)) preds = K.softmax(logits) l1 = gen_adv_loss(logits, y, mean=True) # add adversarial training loss if x_advs is not None: idx = tf.placeholder(dtype=np.int32) logits_adv = model(tf.stack(x_advs)[idx]) l2 = gen_adv_loss(logits_adv, y, mean=True) if benign == 0: loss = l2 elif benign == 1: loss = 0.5 * (l1 + l2) else: l2 = tf.constant(0) loss = l1 optimizer = tf.train.AdamOptimizer().minimize(loss) saver = tf.train.Saver(set(tf.global_variables()) - old_vars) # Run all the initializers to prepare the trainable parameters. K.get_session().run(tf.initialize_variables( set(tf.global_variables()) - old_vars)) start_time = time.time() print('Initialized!') # Loop through training steps. num_steps = int(NUM_EPOCHS * train_size + BATCH_SIZE - 1) // BATCH_SIZE step = 0 training_loss = 0 epoch_count = 0 step_old = 0 for (batch_data, batch_labels) \ in generator.flow(X_train, Y_train, batch_size=BATCH_SIZE): if len(batch_data) < BATCH_SIZE: k = BATCH_SIZE - len(batch_data) batch_data = np.concatenate([batch_data, X_train[0:k]]) batch_labels = np.concatenate([batch_labels, Y_train[0:k]]) feed_dict = {x: batch_data, y: batch_labels, K.learning_phase(): 1} # choose source of adversarial examples at random # (for ensemble adversarial training) if x_advs is not None: feed_dict[idx] = np.random.randint(len(x_advs)) # Run the graph _, curr_loss, curr_l1, curr_l2, curr_preds, _ = \ K.get_session().run([optimizer, loss, l1, l2, preds] + [model.updates], feed_dict=feed_dict) training_loss += curr_loss epoch = float(step) * BATCH_SIZE / train_size if epoch >= epoch_count: epoch_count += 1 elapsed_time = time.time() - start_time start_time = time.time() print('Step %d (epoch %.2f), %.2f s' % (step, float(step) * BATCH_SIZE / train_size, elapsed_time)) print('Training loss: %.3f' % (training_loss / (step - step_old))) training_loss = 0 step_old = step print('Minibatch loss: %.3f (%.3f, %.3f)' % (curr_loss, curr_l1, curr_l2)) _, _, minibatch_error = error_rate(curr_preds, batch_labels) print('Minibatch error: %.1f%%' % minibatch_error) # if epoch % 10 == 0 or (step == (num_steps-1)): # save_path = saver.save(K.get_session(), "/tmp/model.ckpt") # save_model(model, 'tmp/model.ckpt') # print("Model saved in file: %s" % 'model.ckpt') sys.stdout.flush() step += 1 if step == num_steps: break
def tf_train(graph, x, y, data, model_fn, train_mode=None, x_advs=None): old_vars = set(tf.all_variables()) if x_advs is not None: # ensemblem adversarial training #img_resize_tensor = tf.placeholder(tf.int32, [2]) #shape_tensor = tf.placeholder(tf.int32, [3]) idx = tf.placeholder(tf.int32) x_adv = tf.stack(x_advs)[idx] # padding and resize, 2nd place NIPS 2017 #x_resize = tf.image.resize_images(x, img_resize_tensor, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) #x_adv_resize = tf.image.resize_images(x_adv, img_resize_tensor, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) #padded_input = padding_layer_iyswim(x_resize, shape_tensor) #padded_input.set_shape((FLAGS.BATCH_SIZE, FLAGS.IMAGE_RESIZE, FLAGS.IMAGE_RESIZE, 3)) #padded_adv_input = padding_layer_iyswim(x_adv_resize, shape_tensor) #padded_adv_input.set_shape((FLAGS.BATCH_SIZE, FLAGS.IMAGE_RESIZE, FLAGS.IMAGE_RESIZE, 3)) # Augmentation is only applied on the clean image def aug_1(): print("Augmentated") return augmentX(x, FLAGS.AUG_RATIO), x_adv, augmentY( y, FLAGS.AUG_RATIO) def aug_2(): return x, x_adv, y #if in the test mode, do not need to do augmentation, so apply aug_2() if train_mode is None: aug_X = x X_adv = x_adv aug_Y = y else: aug_X, X_adv, aug_Y = tf.cond(tf.equal(train_mode, True), aug_1, aug_2) inputs = tf.concat([aug_X, X_adv], 0) labels = tf.concat([aug_Y, y], 0) else: inputs = x labels = y # Generate cross-entropy loss for training if x_advs is not None: logits, model = model_fn(inputs) else: logits, model = model_fn(inputs, train_mode) preds = tf.nn.softmax(logits) loss = gen_adv_loss(logits, labels, mean=True) loss *= 16 if FLAGS.REG_SCALE is not None: # if you want regularization regularize = tf.contrib.layers.l2_regularizer(FLAGS.REG_SCALE) params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) #print tf.GraphKeys.TRAINABLE_VARIABLES reg_term = sum([regularize(param) for param in params]) loss += reg_term learning_rate = 1e-5 momentum = 0.9 optimizer = tf.train.MomentumOptimizer(learning_rate, momentum, use_nesterov=True).minimize(loss) #""" config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.InteractiveSession(config=config, graph=graph) sess.run(tf.global_variables_initializer()) #sess.run(tf.initialize_variables(set(tf.all_variables()) - old_vars)) if model.name == 'resnet': model.tf_load(sess, "./resnet18/checkpoints/model/" ) ##give only the directory not the file graph_dict = {} train_size = 1000000 start_time = time.time() print('Initialized!') # Loop through training steps. num_steps = int(FLAGS.NUM_EPOCHS * train_size + FLAGS.BATCH_SIZE - 1) // FLAGS.BATCH_SIZE print('Number of Iteration: %.1f' % num_steps) print('Number of Epoches for Iteration: %.1f' % FLAGS.NUM_EPOCHS) print('Batch Size: %.1f' % FLAGS.BATCH_SIZE) step = 0 while step < 1: batch_data, batch_labels = data.next_train_batch(FLAGS.BATCH_SIZE) #batch_data, batch_labels = data.load_n_images_all_classes(2) fetches = [optimizer, loss, preds, labels, logits] #if x_advs is None: feed_dict = {x: batch_data, y: batch_labels, train_mode: True} """else: resize_shape = np.random.randint(85, 96) # random resize shape feed_dict = { x: batch_data, y: batch_labels, train_mode: True, img_resize_tensor: [resize_shape]*2, shape_tensor: np.array( [np.random.randint(0, FLAGS.IMAGE_RESIZE - resize_shape), np.random.randint(0, FLAGS.IMAGE_RESIZE - resize_shape), FLAGS.IMAGE_RESIZE])""" #} # choose source of adversarial examples at random # (for ensemble adversarial training) if x_advs is not None: feed_dict[idx] = np.random.randint(len(x_advs)) graph_dict["idx"] = idx #graph_dict["img_resize_tensor"] = img_resize_tensor #graph_dict["shape_tensor"] = shape_tensor #graph_dict["resize_shape"] = resize_shape # Run the graph _, curr_loss, curr_preds, curr_labels, curr_logits = \ sess.run(fetches=fetches, feed_dict=feed_dict) if step % FLAGS.EVAL_FREQUENCY == 0: elapsed_time = time.time() - start_time start_time = time.time() print('Step %d (epoch %.2f), %.2f s' % (step, float(step) * FLAGS.BATCH_SIZE / train_size, elapsed_time)) print('Step : %d ' % step) print('Minibatch loss: %.3f' % curr_loss) print('Minibatch error: %.1f%%' % error_rate(curr_preds, curr_labels)) sys.stdout.flush() step += 1 if model.name == 'resnet': model.tf_save(sess, "./models/res_check0901/resnet18.ckpt-0901") else: model.save_npy(sess, npy_path="./models/" + model.name + "-save.npy") graph_dict["x"] = x graph_dict["y"] = y graph_dict["inputs"] = inputs graph_dict["labels"] = labels graph_dict["train_mode"] = train_mode graph_dict["logits"] = logits graph_dict["preds"] = preds graph_dict["optimizer"] = optimizer graph_dict["loss"] = loss # ERROR: NameError: name 'pickle_dict' is not defined """pickle_dict = { "step_list": step_list, "loss_list": loss_list, "l1_list": l1_list, "l2_list": l2_list, "err_list": err_list }""" return sess, graph_dict
def tf_train(x, y, model, X_train, Y_train, generator, x_advs=None): old_vars = set(tf.all_variables()) train_size = Y_train.shape[0] # Generate cross-entropy loss for training logits = model(x) preds = K.softmax(logits) l1 = gen_adv_loss(logits, y, mean=True) # add adversarial training loss if x_advs is not None: idx = tf.placeholder(dtype=np.int32) logits_adv = model(tf.stack(x_advs)[idx]) l2 = gen_adv_loss(logits_adv, y, mean=True) loss = 0.5 * (l1 + l2) else: l2 = tf.constant(0) loss = l1 optimizer = tf.train.AdamOptimizer().minimize(loss) # Run all the initializers to prepare the trainable parameters. K.get_session().run( tf.initialize_variables(set(tf.all_variables()) - old_vars)) start_time = time.time() print('Initialized!') # Loop through training steps. num_steps = int(FLAGS.NUM_EPOCHS * train_size + FLAGS.BATCH_SIZE - 1) // FLAGS.BATCH_SIZE step = 0 for (batch_data, batch_labels) \ in generator.flow(X_train, Y_train, batch_size=FLAGS.BATCH_SIZE): if len(batch_data) < FLAGS.BATCH_SIZE: k = FLAGS.BATCH_SIZE - len(batch_data) batch_data = np.concatenate([batch_data, X_train[0:k]]) batch_labels = np.concatenate([batch_labels, Y_train[0:k]]) feed_dict = {x: batch_data, y: batch_labels, K.learning_phase(): 1} # choose source of adversarial examples at random # (for ensemble adversarial training) if x_advs is not None: feed_dict[idx] = np.random.randint(len(x_advs)) # Run the graph _, curr_loss, curr_l1, curr_l2, curr_preds, _ = \ K.get_session().run([optimizer, loss, l1, l2, preds] + [model.updates], feed_dict=feed_dict) if step % EVAL_FREQUENCY == 0: elapsed_time = time.time() - start_time start_time = time.time() print('Step %d (epoch %.2f), %.2f s' % (step, float(step) * FLAGS.BATCH_SIZE / train_size, elapsed_time)) print('Minibatch loss: %.3f (%.3f, %.3f)' % (curr_loss, curr_l1, curr_l2)) print('Minibatch error: %.3f%%' % error_rate(curr_preds, batch_labels)) sys.stdout.flush() step += 1 if step == num_steps: break
def tf_train(x, y, model, X_train, Y_train, generator, model_name, x_advs=None): old_vars = set(tf.global_variables()) train_size = Y_train.shape[0] # Generate cross-entropy loss for training logits = model(x) preds = K.softmax(logits) l1 = gen_adv_loss(logits, y, mean=True) # add adversarial training loss if x_advs is not None: idx = tf.placeholder(dtype=tf.int32) x_adv_chosen = tf.stack(x_advs)[idx] logits_adv = model(x_adv_chosen) preds_adv = K.softmax(logits_adv) l2 = gen_adv_loss(logits_adv, y, mean=True) loss = (l1+l2)*0.5 else: l2 = tf.constant(0) loss = l1 if len(model.losses) != 0: loss = loss + tf.add_n(model.losses) optimizer = tf.train.AdamOptimizer(0.001).minimize(loss) # Run all the initializers to prepare the trainable parameters. K.get_session().run(tf.initialize_variables( set(tf.global_variables()) - old_vars)) start_time = time.time() print('Initialized!') num_steps = int(FLAGS.NUM_EPOCHS * train_size + FLAGS.BATCH_SIZE - 1) // FLAGS.BATCH_SIZE step = 0 for (batch_data, batch_labels) \ in generator.flow(X_train, Y_train, batch_size=FLAGS.BATCH_SIZE): if len(batch_data) < FLAGS.BATCH_SIZE: k = FLAGS.BATCH_SIZE - len(batch_data) batch_data = np.concatenate([batch_data, X_train[0:k]]) batch_labels = np.concatenate([batch_labels, Y_train[0:k]]) feed_dict = {x: batch_data, y: batch_labels, K.learning_phase(): 1 } if x_advs is not None: feed_dict[idx] = np.random.randint(len(x_advs)) # Run the graph _, curr_loss, curr_l1, curr_l2, curr_preds, _ = \ K.get_session().run([optimizer, loss, l1, l2, preds] + [model.updates] , feed_dict=feed_dict) if step % EVAL_FREQUENCY == 0: elapsed_time = time.time() - start_time start_time = time.time() with open(model_name + '_log.txt', 'a') as log: log.write('Step %d (epoch %.2f), %.2f s \n' % (step, float(step) * FLAGS.BATCH_SIZE / train_size, elapsed_time)) log.write('Minibatch loss: %.3f (%.3f, %.3f) \n' % (curr_loss, curr_l1, curr_l2)) log.write('Minibatch error: %.1f%%. \n' % (error_rate(curr_preds, batch_labels))) sys.stdout.flush() step += 1 if step == num_steps: break
def tf_train(x, y, model, X_train, Y_train, generator, model_name, x_advs=None, epochs=0): old_vars = set(tf.global_variables()) train_size = Y_train.shape[0] idx = tf.placeholder(dtype=tf.int32) maxval = tf.placeholder(dtype=tf.float32) x_adv_chosen = tf.stack(x_advs)[idx] logits = model(x) preds = K.softmax(logits) l1 = gen_adv_loss(logits, y, mean=True) logits_adv = model(x_adv_chosen) preds_adv = K.softmax(logits_adv) l2 = gen_adv_loss(logits_adv, y, mean=True) coral_loss = get_coral_loss(logits, logits_adv) mmd_loss = get_mmd_loss(logits, logits_adv) alpha_item = 0.1 margin_loss, _ , centers_update_up = get_margin_loss(tf.concat([tf.argmax(tf.cast(y, tf.int32), axis=-1), tf.argmax(tf.cast(y, tf.int32), axis=-1)], axis=0), tf.concat([logits, logits_adv], axis=0), FLAGS.NUM_CLASSES, alpha=alpha_item) lambda_item = 1./3 loss = (l1+l2) + (coral_loss + mmd_loss + margin_loss)*lambda_item # add regulazation loss for layer weights if it exists if len(model.losses) != 0: loss = loss + tf.add_n(model.losses) with tf.control_dependencies([centers_update_up]): optimizer = tf.train.AdamOptimizer(0.001).minimize(loss) # Run all the initializers to prepare the trainable parameters. K.get_session().run(tf.initialize_variables( set(tf.global_variables()) - old_vars)) start_time = time.time() print('Initialized!') # Loop through training steps. num_steps = int(FLAGS.NUM_EPOCHS * train_size + FLAGS.BATCH_SIZE - 1) // FLAGS.BATCH_SIZE step = 0 for (batch_data, batch_labels) \ in generator.flow(X_train, Y_train, batch_size=FLAGS.BATCH_SIZE): if len(batch_data) < FLAGS.BATCH_SIZE: k = FLAGS.BATCH_SIZE - len(batch_data) batch_data = np.concatenate([batch_data, X_train[0:k]]) batch_labels = np.concatenate([batch_labels, Y_train[0:k]]) feed_dict = {x: batch_data, y: batch_labels, K.learning_phase(): 1} # choose source of adversarial examples at random # (for ensemble adversarial training with domain adaptation) feed_dict[idx] = np.random.randint(len(x_advs)) # Run the graph _, curr_loss, curr_l1, curr_l2, curr_mmd, curr_coral, curr_margin_loss, curr_preds, curr_preds_adv,_ = \ K.get_session().run([optimizer, loss, l1, l2, mmd_loss, coral_loss, margin_loss, preds, preds_adv] + [model.updates], feed_dict=feed_dict) if step % EVAL_FREQUENCY == 0: elapsed_time = time.time() - start_time start_time = time.time() with open(model_name + '_log.txt', 'a') as log: log.write('Step %d (epoch %.2f), %.2f s \n' % (step, float(step) * FLAGS.BATCH_SIZE / train_size, elapsed_time)) log.write('Minibatch loss: %.3f (%.3f, %.3f, %.3f, %.3f, %.3f) \n' % (curr_loss, curr_l1, curr_l2, curr_mmd, curr_coral, curr_margin_loss)) log.write('Minibatch error: %.1f%%, %.1f%% \n' % (error_rate(curr_preds, batch_labels), error_rate(curr_preds_adv, batch_labels))) sys.stdout.flush() step += 1 if step == num_steps: break