def train(train_loader, model, criterion, optimizer, lr_init=None, lr_now=None, glob_step=None, lr_decay=None, gamma=None, max_norm=True): losses = utils.AverageMeter() model.train() start = time.time() batch_time = 0 bar = pBar.Bar('>>>', fill='>', max=len(train_loader)) for i, (inps, tars) in enumerate(train_loader): glob_step += 1 if glob_step % lr_decay == 0 or glob_step == 1: lr_now = utils.lr_decay(optimizer, glob_step, lr_init, lr_decay, gamma) inputs = Variable(inps.cuda()) targets = Variable(tars.cuda(async=True)) outputs = model(inputs) # calculate loss optimizer.zero_grad() loss = criterion(outputs, targets) losses.update(loss.item(), inputs.size(0)) loss.backward() if max_norm: nn.utils.clip_grad_norm(model.parameters(), max_norm=1) optimizer.step() # update summary if (i + 1) % 100 == 0: batch_time = time.time() - start start = time.time() bar.suffix = '({batch}/{size}) | batch: {batchtime:.4}ms | Total: {ttl} | ETA: {eta:} | loss: {loss:.4f}' \ .format(batch=i + 1, size=len(train_loader), batchtime=batch_time * 10.0, ttl=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg) bar.next() bar.finish() return glob_step, lr_now, losses.avg
def main(unused_args): assert len(unused_args) == 1, unused_args setup_experiment(logging, FLAGS, "critic_model") if FLAGS.validation: mnist_ds = mnist.read_data_sets(FLAGS.data_dir, dtype=tf.float32, reshape=False, validation_size=0) val_ds = mnist_ds.test else: mnist_ds = mnist.read_data_sets(FLAGS.data_dir, dtype=tf.float32, reshape=False, validation_size=FLAGS.validation_size) val_ds = mnist_ds.validation train_ds = mnist_ds.train val_ds = mnist_ds.validation test_ds = mnist_ds.test num_classes = FLAGS.num_classes img_shape = [None, 1, 28, 28] X = tf.placeholder(tf.float32, shape=img_shape, name='X') # placeholder to avoid recomputation of adversarial images for critic X_hat_h = tf.placeholder(tf.float32, shape=img_shape, name='X_hat') y = tf.placeholder(tf.int32, shape=[None], name='y') y_onehot = tf.one_hot(y, num_classes) reduce_ind = list(range(1, X.get_shape().ndims)) # test/validation inputs X_v = tf.placeholder(tf.float32, shape=img_shape, name='X_v') y_v = tf.placeholder(tf.int32, shape=[None], name='y_v') y_v_onehot = tf.one_hot(y_v, num_classes) # classifier model model = create_model(FLAGS, name=FLAGS.model_name) def test_model(x, **kwargs): return model(x, train=False, **kwargs) # generator def generator(inputs, confidence, targets=None): return high_confidence_attack_unrolled( lambda x: model(x)['logits'], inputs, targets=targets, confidence=confidence, max_iter=FLAGS.attack_iter, over_shoot=FLAGS.attack_overshoot, attack_random=FLAGS.attack_random, attack_uniform=FLAGS.attack_uniform, attack_label_smoothing=FLAGS.attack_label_smoothing) def test_generator(inputs, confidence, targets=None): return high_confidence_attack(lambda x: test_model(x)['logits'], inputs, targets=targets, confidence=confidence, max_iter=FLAGS.df_iter, over_shoot=FLAGS.df_overshoot, random=FLAGS.attack_random, uniform=FLAGS.attack_uniform, clip_dist=FLAGS.df_clip) # discriminator critic = create_model(FLAGS, prefix='critic_', name='critic') # classifier outputs outs_x = model(X) outs_x_v = test_model(X_v) params = tf.trainable_variables() model_weights = [param for param in params if "weights" in param.name] vars = tf.model_variables() target_conf_v = [None] if FLAGS.attack_confidence == "same": # set the target confidence to the confidence of the original prediction target_confidence = outs_x['conf'] target_conf_v[0] = target_confidence elif FLAGS.attack_confidence == "class_running_mean": # set the target confidence to the mean confidence of the specific target # use running mean estimate class_conf_mean = tf.Variable(np.ones(num_classes, dtype=np.float32)) batch_conf_mean = tf.unsorted_segment_mean(outs_x['conf'], outs_x['pred'], num_classes) # if batch does not contain predictions for the specific target # (zeroes), replace zeroes with stored class mean (previous batch) batch_conf_mean = tf.where(tf.not_equal(batch_conf_mean, 0), batch_conf_mean, class_conf_mean) # update class confidence mean class_conf_mean = assign_moving_average(class_conf_mean, batch_conf_mean, 0.5) # init class confidence during pre-training tf.add_to_collection("PREINIT_OPS", class_conf_mean) def target_confidence(targets_onehot): targets = tf.argmax(targets_onehot, axis=1) check_conf = tf.Assert( tf.reduce_all(tf.not_equal(class_conf_mean, 0)), [class_conf_mean]) with tf.control_dependencies([check_conf]): t = tf.gather(class_conf_mean, targets) target_conf_v[0] = t return tf.stop_gradient(t) else: target_confidence = float(FLAGS.attack_confidence) target_conf_v[0] = target_confidence X_hat = generator(X, target_confidence) outs_x_hat = model(X_hat) # select examples for which attack succeeded (changed the prediction) X_hat_filter = tf.not_equal(outs_x['pred'], outs_x_hat['pred']) X_hat_f = tf.boolean_mask(X_hat, X_hat_filter) X_f = tf.boolean_mask(X, X_hat_filter) outs_x_f = model(X_f) outs_x_hat_f = model(X_hat_f) X_hatd = tf.stop_gradient(X_hat) X_rec = generator(X_hatd, outs_x['conf'], outs_x['pred']) X_rec_f = tf.boolean_mask(X_rec, X_hat_filter) # validation/test adversarial examples X_v_hat = test_generator(X_v, FLAGS.val_attack_confidence) X_v_hatd = tf.stop_gradient(X_v_hat) X_v_rec = test_generator(X_v_hatd, outs_x_v['conf'], targets=outs_x_v['pred']) X_v_hat_df = deepfool(lambda x: test_model(x)['logits'], X_v, y_v, max_iter=FLAGS.df_iter, clip_dist=FLAGS.df_clip) X_v_hat_df_all = deepfool(lambda x: test_model(x)['logits'], X_v, max_iter=FLAGS.df_iter, clip_dist=FLAGS.df_clip) y_hat = outs_x['pred'] y_adv = outs_x_hat['pred'] y_adv_f = outs_x_hat_f['pred'] tf.summary.histogram('y_data', y, collections=["model_summaries"]) tf.summary.histogram('y_hat', y_hat, collections=["model_summaries"]) tf.summary.histogram('y_adv', y_adv, collections=["model_summaries"]) # critic outputs critic_outs_x = critic(X) critic_outs_x_hat = critic(X_hat_f) critic_params = list(set(tf.trainable_variables()) - set(params)) critic_vars = list(set(tf.trainable_variables()) - set(vars)) # binary logits for a specific target logits_data = critic_outs_x['logits'] logits_data_flt = tf.reshape(logits_data, (-1, )) z_data = tf.gather(logits_data_flt, tf.range(tf.shape(X)[0]) * num_classes + y) logits_adv = critic_outs_x_hat['logits'] logits_adv_flt = tf.reshape(logits_adv, (-1, )) z_adv = tf.gather(logits_adv_flt, tf.range(tf.shape(X_hat_f)[0]) * num_classes + y_adv_f) # classifier/generator losses nll = tf.reduce_mean( tf.losses.softmax_cross_entropy(y_onehot, outs_x['logits'])) nll_v = tf.reduce_mean( tf.losses.softmax_cross_entropy(y_v_onehot, outs_x_v['logits'])) # gan losses gan = tf.losses.sigmoid_cross_entropy(tf.ones_like(z_adv), z_adv) rec_l1 = tf.reduce_mean( tf.reduce_sum(tf.abs(X_f - X_rec_f), axis=reduce_ind)) rec_l2 = tf.reduce_mean(tf.reduce_sum((X_f - X_rec_f)**2, axis=reduce_ind)) weight_decay = slim.apply_regularization(slim.l2_regularizer(1.0), model_weights[:-1]) pretrain_loss = nll + 5e-6 * weight_decay loss = nll + FLAGS.lmbd * gan if FLAGS.lmbd_rec_l1 > 0: loss += FLAGS.lmbd_rec_l1 * rec_l1 if FLAGS.lmbd_rec_l2 > 0: loss += FLAGS.lmbd_rec_l2 * rec_l2 if FLAGS.weight_decay > 0: loss += FLAGS.weight_decay * weight_decay # critic loss critic_gan_data = tf.losses.sigmoid_cross_entropy(tf.ones_like(z_data), z_data) # use placeholder for X_hat to avoid recomputation of adversarial noise y_adv_h = model(X_hat_h)['pred'] logits_adv_h = critic(X_hat_h)['logits'] logits_adv_flt_h = tf.reshape(logits_adv_h, (-1, )) z_adv_h = tf.gather(logits_adv_flt_h, tf.range(tf.shape(X_hat_h)[0]) * num_classes + y_adv_h) critic_gan_adv = tf.losses.sigmoid_cross_entropy(tf.zeros_like(z_adv_h), z_adv_h) critic_gan = critic_gan_data + critic_gan_adv # Gulrajani discriminator regularizer (we do not interpolate) critic_grad_data = tf.gradients(z_data, X)[0] critic_grad_adv = tf.gradients(z_adv_h, X_hat_h)[0] critic_grad_penalty = norm_penalty(critic_grad_adv) + norm_penalty( critic_grad_data) critic_loss = critic_gan + FLAGS.lmbd_grad * critic_grad_penalty # classifier model_metrics err = 1 - slim.metrics.accuracy(outs_x['pred'], y) conf = tf.reduce_mean(outs_x['conf']) err_hat = 1 - slim.metrics.accuracy( test_model(X_hat)['pred'], outs_x['pred']) err_hat_f = 1 - slim.metrics.accuracy( test_model(X_hat_f)['pred'], outs_x_f['pred']) err_rec = 1 - slim.metrics.accuracy( test_model(X_rec)['pred'], outs_x['pred']) conf_hat = tf.reduce_mean(test_model(X_hat)['conf']) conf_hat_f = tf.reduce_mean(test_model(X_hat_f)['conf']) conf_rec = tf.reduce_mean(test_model(X_rec)['conf']) err_v = 1 - slim.metrics.accuracy(outs_x_v['pred'], y_v) conf_v_hat = tf.reduce_mean(test_model(X_v_hat)['conf']) l2_hat = tf.sqrt(tf.reduce_sum((X_f - X_hat_f)**2, axis=reduce_ind)) tf.summary.histogram('l2_hat', l2_hat, collections=["model_summaries"]) # critic model_metrics critic_err_data = 1 - binary_accuracy( z_data, tf.ones(tf.shape(z_data), tf.bool), 0.0) critic_err_adv = 1 - binary_accuracy( z_adv, tf.zeros(tf.shape(z_adv), tf.bool), 0.0) # validation model_metrics err_df = 1 - slim.metrics.accuracy(test_model(X_v_hat_df)['pred'], y_v) err_df_all = 1 - slim.metrics.accuracy( test_model(X_v_hat_df_all)['pred'], outs_x_v['pred']) l2_v_hat = tf.sqrt(tf.reduce_sum((X_v - X_v_hat)**2, axis=reduce_ind)) l2_v_rec = tf.sqrt(tf.reduce_sum((X_v - X_v_rec)**2, axis=reduce_ind)) l1_v_rec = tf.reduce_sum(tf.abs(X_v - X_v_rec), axis=reduce_ind) l2_df = tf.sqrt(tf.reduce_sum((X_v - X_v_hat_df)**2, axis=reduce_ind)) l2_df_norm = l2_df / tf.sqrt(tf.reduce_sum(X_v**2, axis=reduce_ind)) l2_df_all = tf.sqrt( tf.reduce_sum((X_v - X_v_hat_df_all)**2, axis=reduce_ind)) l2_df_norm_all = l2_df_all / tf.sqrt(tf.reduce_sum(X_v**2, axis=reduce_ind)) tf.summary.histogram('l2_df', l2_df, collections=["adv_summaries"]) tf.summary.histogram('l2_df_norm', l2_df_norm, collections=["adv_summaries"]) # model_metrics pretrain_model_metrics = OrderedDict([('nll', nll), ('weight_decay', weight_decay), ('err', err)]) model_metrics = OrderedDict([('loss', loss), ('nll', nll), ('l2_hat', tf.reduce_mean(l2_hat)), ('gan', gan), ('rec_l1', rec_l1), ('rec_l2', rec_l2), ('weight_decay', weight_decay), ('err', err), ('conf', conf), ('err_hat', err_hat), ('err_hat_f', err_hat_f), ('conf_t', tf.reduce_mean(target_conf_v[0])), ('conf_hat', conf_hat), ('conf_hat_f', conf_hat_f), ('err_rec', err_rec), ('conf_rec', conf_rec)]) critic_metrics = OrderedDict([('c_loss', critic_loss), ('c_gan', critic_gan), ('c_gan_data', critic_gan_data), ('c_gan_adv', critic_gan_adv), ('c_grad_norm', critic_grad_penalty), ('c_err_adv', critic_err_adv), ('c_err_data', critic_err_data)]) val_metrics = OrderedDict([('nll', nll_v), ('err', err_v)]) adv_metrics = OrderedDict([('l2_df', tf.reduce_mean(l2_df)), ('l2_df_norm', tf.reduce_mean(l2_df_norm)), ('l2_df_all', tf.reduce_mean(l2_df_all)), ('l2_df_all_norm', tf.reduce_mean(l2_df_norm_all)), ('l2_hat', tf.reduce_mean(l2_v_hat)), ('conf_hat', conf_v_hat), ('l1_rec', tf.reduce_mean(l1_v_rec)), ('l2_rec', tf.reduce_mean(l2_v_rec)), ('err_df', err_df), ('err_df_all', err_df_all)]) pretrain_metric_mean, pretrain_metric_upd = register_metrics( pretrain_model_metrics, collections="pretrain_model_summaries") metric_mean, metric_upd = register_metrics(model_metrics, collections="model_summaries") critic_metric_mean, critic_metric_upd = register_metrics( critic_metrics, collections="critic_summaries") val_metric_mean, val_metric_upd = register_metrics( val_metrics, prefix="val_", collections="val_summaries") adv_metric_mean, adv_metric_upd = register_metrics( adv_metrics, collections="adv_summaries") metrics_reset = tf.variables_initializer(tf.local_variables()) # training ops lr = tf.Variable(FLAGS.lr, trainable=False) critic_lr = tf.Variable(FLAGS.critic_lr, trainable=False) tf.summary.scalar('lr', lr, collections=["model_summaries"]) tf.summary.scalar('critic_lr', critic_lr, collections=["critic_summaries"]) optimizer = tf.train.AdamOptimizer(learning_rate=lr, beta1=0.5) preinit_ops = tf.get_collection("PREINIT_OPS") with tf.control_dependencies(preinit_ops): pretrain_solver = optimizer.minimize(pretrain_loss, var_list=params) solver = optimizer.minimize(loss, var_list=params) critic_solver = (tf.train.AdamOptimizer( learning_rate=critic_lr, beta1=0.5).minimize(critic_loss, var_list=critic_params)) # train summary_images, summary_labels = select_balanced_subset( train_ds.images, train_ds.labels, num_classes, num_classes) summary_images = summary_images.transpose((0, 3, 1, 2)) save_path = os.path.join(FLAGS.samples_dir, 'orig.png') save_images(summary_images, save_path) if FLAGS.gpu_memory < 1.0: gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_memory) config = tf.ConfigProto(gpu_options=gpu_options) else: config = None with tf.Session(config=config) as sess: try: # summaries summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph) summaries = tf.summary.merge_all("model_summaries") critic_summaries = tf.summary.merge_all("critic_summaries") val_summaries = tf.summary.merge_all("val_summaries") adv_summaries = tf.summary.merge_all("adv_summaries") # initialization tf.local_variables_initializer().run() tf.global_variables_initializer().run() # pretrain model if FLAGS.pretrain_niter > 0: logging.info("Model pretraining") for epoch in range(1, FLAGS.pretrain_niter + 1): train_iterator = batch_iterator(train_ds.images, train_ds.labels, FLAGS.batch_size, shuffle=True) sess.run(metrics_reset) start_time = time.time() for ind, (images, labels) in enumerate(train_iterator): sess.run([pretrain_solver, pretrain_metric_upd], feed_dict={ X: images, y: labels }) str_bfr = six.StringIO() str_bfr.write("Pretrain epoch [{}, {:.2f}s]:".format( epoch, time.time() - start_time)) print_results_str(str_bfr, pretrain_model_metrics.keys(), sess.run(pretrain_metric_mean)) print_results_str(str_bfr, critic_metrics.keys(), sess.run(critic_metric_mean)) logging.info(str_bfr.getvalue()[:-1]) # training for epoch in range(1, FLAGS.niter + 1): train_iterator = batch_iterator(train_ds.images, train_ds.labels, FLAGS.batch_size, shuffle=True) sess.run(metrics_reset) start_time = time.time() for ind, (images, labels) in enumerate(train_iterator): batch_index = (epoch - 1) * (train_ds.images.shape[0] // FLAGS.batch_size) + ind # train critic for several steps X_hat_np = sess.run(X_hat, feed_dict={X: images}) for _ in range(FLAGS.critic_steps - 1): sess.run([critic_solver], feed_dict={ X: images, y: labels, X_hat_h: X_hat_np }) else: summary = sess.run([ critic_solver, critic_metric_upd, critic_summaries ], feed_dict={ X: images, y: labels, X_hat_h: X_hat_np })[-1] summary_writer.add_summary(summary, batch_index) # train model summary = sess.run([solver, metric_upd, summaries], feed_dict={ X: images, y: labels })[-1] summary_writer.add_summary(summary, batch_index) str_bfr = six.StringIO() str_bfr.write("Train epoch [{}, {:.2f}s]:".format( epoch, time.time() - start_time)) print_results_str(str_bfr, model_metrics.keys(), sess.run(metric_mean)) print_results_str(str_bfr, critic_metrics.keys(), sess.run(critic_metric_mean)) logging.info(str_bfr.getvalue()[:-1]) val_iterator = batch_iterator(val_ds.images, val_ds.labels, 100, shuffle=False) for images, labels in val_iterator: summary = sess.run([val_metric_upd, val_summaries], feed_dict={ X_v: images, y_v: labels })[-1] summary_writer.add_summary(summary, epoch) str_bfr = six.StringIO() str_bfr.write("Valid epoch [{}]:".format(epoch)) print_results_str(str_bfr, val_metrics.keys(), sess.run(val_metric_mean)) logging.info(str_bfr.getvalue()[:-1]) # learning rate decay update_lr = lr_decay(lr, epoch) if update_lr is not None: sess.run(update_lr) logging.debug( "learning rate was updated to: {:.10f}".format( lr.eval())) critic_update_lr = lr_decay(critic_lr, epoch, prefix='critic_') if critic_update_lr is not None: sess.run(critic_update_lr) logging.debug( "critic learning rate was updated to: {:.10f}".format( critic_lr.eval())) if epoch % FLAGS.summary_frequency == 0: samples_hat, samples_rec, samples_df, summary = sess.run( [ X_v_hat, X_v_rec, X_v_hat_df, adv_summaries, adv_metric_upd ], feed_dict={ X_v: summary_images, y_v: summary_labels })[:-1] summary_writer.add_summary(summary, epoch) save_path = os.path.join(FLAGS.samples_dir, 'epoch_orig-%d.png' % epoch) save_images(summary_images, save_path) save_path = os.path.join(FLAGS.samples_dir, 'epoch-%d.png' % epoch) save_images(samples_hat, save_path) save_path = os.path.join(FLAGS.samples_dir, 'epoch_rec-%d.png' % epoch) save_images(samples_rec, save_path) save_path = os.path.join(FLAGS.samples_dir, 'epoch_df-%d.png' % epoch) save_images(samples_df, save_path) str_bfr = six.StringIO() str_bfr.write("Summary epoch [{}]:".format(epoch)) print_results_str(str_bfr, adv_metrics.keys(), sess.run(adv_metric_mean)) logging.info(str_bfr.getvalue()[:-1]) if FLAGS.checkpoint_frequency != -1 and epoch % FLAGS.checkpoint_frequency == 0: save_checkpoint(sess, vars, epoch=epoch) save_checkpoint(sess, critic_vars, name="critic_model", epoch=epoch) except KeyboardInterrupt: logging.debug("Keyboard interrupt. Stopping training...") except NanError as e: logging.info(e) finally: sess.run(metrics_reset) save_checkpoint(sess, vars) save_checkpoint(sess, critic_vars, name="critic_model") # final accuracy test_iterator = batch_iterator(test_ds.images, test_ds.labels, 100, shuffle=False) for images, labels in test_iterator: sess.run([val_metric_upd], feed_dict={X_v: images, y_v: labels}) str_bfr = six.StringIO() str_bfr.write("Final epoch [{}]:".format(epoch)) for metric_name, metric_value in zip(val_metrics.keys(), sess.run(val_metric_mean)): str_bfr.write(" {}: {:.6f},".format(metric_name, metric_value)) logging.info(str_bfr.getvalue()[:-1])
parser.add_argument('--beta1', type=float, default=0.9) parser.add_argument('--beta2', type=float, default=0.999) parser.add_argument('--decay_step', type=int, default=2e+5) parser.add_argument('--decay_rate', type=int, default=2) flags = parser.parse_args() dataset = Dataset(flags) srram = SRRAM(scale_factor=flags.scale_factor) save_path = utils.build_save_path(flags) cbs = [ TensorBoard(log_dir=save_path, histogram_freq=1, write_graph=True), LearningRateScheduler( lambda epoch: utils.lr_decay(epoch, init_value=flags.lr, decay_step=flags.decay_step, decay_rate=flags.decay_rate)), EarlyStopping(monitor='val_loss', patience=flags.patience, verbose=0, mode='auto'), ModelCheckpoint(save_path + '/model.h5', save_best_only=True) ] srram.model.compile(optimizer=Adam(lr=flags.lr, epsilon=1e-8), loss='mae') srram.model.fit(dataset.train_set, epochs=flags.epochs, steps_per_epoch=dataset.train_steps_per_epoch, validation_data=dataset.val_set, validation_steps=dataset.val_steps_per_epoch, callbacks=cbs)
optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9) train_begin = time.time() print('train begin', '-' * 50) print() print() writer = SummaryWriter(os.path.join(args.output_path, 'log')) batch_num = -1 best_f1 = -1 early_stop = 0 for epoch in range(args.epochs): epoch_begin = time.time() print('train {}/{} epoch'.format(epoch + 1, args.epochs)) optimizer = lr_decay(optimizer, epoch, 0.05, args.lr) batch_num = train_model(train_dataloader, model, optimizer, batch_num, writer, use_gpu) new_f1 = evaluate(dev_dataloader, model, word_vocab, label_vocab, args.output_path, prefix='dev', use_gpu=use_gpu) print('f1 is {} at {}th epoch on dev set'.format(new_f1, epoch + 1)) if new_f1 > best_f1: best_f1 = new_f1 print('new best f1 on dev set:', best_f1) early_stop = 0 torch.save(model.state_dict(), model_name)
def train(self, plot_period: int=5): """ define loss-, optimzer- and scheduler-functions """ criterion_disc = nn.BCELoss() criterion_gen = nn.BCELoss() optimizer_disc = torch.optim.Adam(self.discriminator.parameters(), lr=self.disc_lr, betas=(0.5, 0.999)) optimizer_gen = torch.optim.Adam(self.generator.parameters(), lr=self.gen_lr, betas=(0.5, 0.999)) """ create benchmark """ self.benchmark_logger.create_entry(self.benchmark_id, optimizer_disc, criterion_disc, self.epochs, self.batch_size, self.disc_lr, self.gen_lr, self.disc_lr_decay, self.gen_lr_decay, self.lr_decay_period, self.gaussian_noise_range) # initial noise rate noise_rate = self.gaussian_noise_range[0] # total loss log loss_disc_history, loss_disc_real_history, loss_disc_fake_history = [], [], [] loss_gen_history = [] for epoch in range(self.epochs): # epoch loss log epoch_loss_disc, epoch_loss_disc_real, epoch_loss_disc_fake = [], [], [] epoch_loss_gen = [] for iteration in tqdm(range(self.iterations), ncols=120, desc="batch-iterations"): images_real, targets_real, images_fake, targets_fake = self._create_batch(iteration) """ train discriminator """ # update every third iteration to make the generator stronger self.discriminator.zero_grad() # train with real images predictions_real = self.discriminator.train()(images_real, gaussian_noise_rate=noise_rate) loss_real = criterion_disc(predictions_real, targets_real) loss_real.backward() # train with fake images predictions_fake = self.discriminator.train()(images_fake, gaussian_noise_rate=noise_rate) loss_fake = criterion_disc(predictions_fake, targets_fake) loss_fake.backward(retain_graph=True) if iteration % 1 == 0: optimizer_disc.step() # save losses epoch_loss_disc.append(loss_real.item() + loss_fake.item()) epoch_loss_disc_real.append(loss_real.item()) epoch_loss_disc_fake.append(loss_fake.item()) """ train generator """ self.generator.zero_grad() # train discriminator on fake images with target "real image" ([1, 0]) predictions_fake = self.discriminator.train()(images_fake) loss_gen = criterion_gen(predictions_fake, targets_real) loss_gen.backward() optimizer_gen.step() epoch_loss_gen.append(loss_gen.item()) """ linear gaussian noise decay for disc. inputs """ noise_rate = np.linspace(self.gaussian_noise_range[0], self.gaussian_noise_range[1], self.epochs)[epoch] """ save models """ save_models(self.generator, self.discriminator, save_to=(self.models_path), current_epoch=epoch, period=5) """ calculate average losses of the epoch """ current_loss_disc, current_loss_disc_real, current_loss_disc_fake = round(np.mean(epoch_loss_disc), 4), round(np.mean(epoch_loss_disc_real), 4), round(np.mean(epoch_loss_disc_fake), 4) current_loss_gen = round(np.mean(epoch_loss_gen), 4) """ get learning-rate """ current_disc_lr = round(optimizer_disc.param_groups[0]["lr"], 7) current_gen_lr = round(optimizer_gen.param_groups[0]["lr"], 7) """ learning-rate decay (set 'p' to 'False' for not doing lr-decay) """ do = False if do: optimizer_disc.param_groups[0]["lr"] = lr_decay(lr=optimizer_disc.param_groups[0]["lr"], epoch=epoch, decay_rate=self.disc_lr_decay, period=self.lr_decay_period) optimizer_gen.param_groups[0]["lr"] = lr_decay(lr=optimizer_gen.param_groups[0]["lr"], epoch=epoch, decay_rate=self.gen_lr_decay, period=self.lr_decay_period) """ save losses for plotting """ loss_disc_history.append(current_loss_disc); loss_disc_real_history.append(current_loss_disc_real); loss_disc_fake_history.append(current_loss_disc_fake) loss_gen_history.append(current_loss_gen) """ print trainings progress """ print_progress(epoch, self.epochs, current_loss_disc, current_loss_disc_real, current_loss_disc_fake, current_loss_gen, current_disc_lr, current_gen_lr) """ plot generated images """ if plot_period is not None: show_generated(self.generator, view_seconds=1, current_epoch=epoch, period=plot_period, save_to=(self.generated_images_path + "/" + str(epoch + 1) + ".png")) """ plot loss history """ plot_loss_history(loss_disc_history, loss_disc_real_history, loss_disc_fake_history, loss_gen_history, save_to=(self.plots_path + "/" + self.benchmark_id + ".png"))
def train(self): self.early_stop = False self.train_hist = {} self.train_hist['train_loss'] = [] self.train_hist['train_accuracy'] = [] self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] self.record_file = os.path.join(self.result_dir,"records_"+str(self.img_size)+".txt") # read data for training stage if self.dataset == 'wood': self.train_data_loader = utilsLoadData.load_all( data_dir='../data/wood', batch_size=self.batch_size, img_size=self.img_size) elif self.dataset == 'DAGM_8': self.train_data_loader = utilsLoadData.load_all( data_dir='../data/DAGM_8', batch_size=self.batch_size, img_size=self.img_size) elif self.dataset == 'DAGM_10': self.train_data_loader = utilsLoadData.load_all( data_dir='../data/DAGM_10', batch_size=self.batch_size, img_size=self.img_size) elif self.dataset == 'middle_white': self.train_data_loader = utilsLoadData.load_two(self.batch_size, self.img_size, '../data/middle_white/OK_train_128', '../data/middle_white/NG_train_128') elif self.dataset == 'flower_chip': # because flower_chip flaw is polycrystaline, so OK samples are different than other monocrystaline self.train_data_loader = utilsLoadData.load_two(self.batch_size, self.img_size, '../generative_models/pytorch/data/flower_chip/OK/train', '../generative_models/pytorch/data/flower_chip/NG/train') elif self.dataset == 'gas_leak_dirt': self.train_data_loader = utilsLoadData.load_two(self.batch_size, self.img_size, '../generative_models/pytorch/data/solar_128', '../generative_models/pytorch/data/gas_leak_dirt/train') elif self.dataset == 'intra_chip_diff': self.train_data_loader = utilsLoadData.load_two(self.batch_size, self.img_size, '../generative_models/pytorch/data/solar_128', '../generative_models/pytorch/data/intra_chip_diff') else: raise Exception("[!] No dataset named %s" %self.dataset) # start training self.network.train() print(bcolors.OKBLUE+"Training start! total: %d data" %self.train_data_loader.dataset.__len__()+bcolors.ENDC) with open(self.record_file, 'w') as f: f.write("learning rate: %f\n" %self.lr) start_time = time.time() for epoch in range(self.epoch): # initialize some variables to calculate confusion matrix neg = torch.zeros(self.batch_size, 1).type(torch.LongTensor).cuda() pos = torch.ones(self.batch_size, 1).type(torch.LongTensor).cuda() corrects = 0 self.train_tp, self.train_fn, self.train_fp, self.train_tn = 0, 0, 0, 0 utils.lr_decay(self.optimizer, (epoch+1), self.decay, mode='lambda') # start epoch epoch_start_time = time.time() for iter, (x_, y_) in enumerate(self.train_data_loader): if iter == (self.train_data_loader.dataset.__len__()//self.batch_size): neg = torch.zeros(len(x_[:]), 1).type(torch.LongTensor).cuda() pos = torch.ones(len(x_[:]), 1).type(torch.LongTensor).cuda() x_, y_ = Variable(x_.cuda()), Variable(y_.type(torch.FloatTensor).cuda()) # update betwork self.optimizer.zero_grad() outputs = self.network(x_) _, preds = torch.max(outputs.data, 1, keepdim=True) # find the max pos _, gts = torch.max(y_.data, 1, keepdim=True) # find the max pos loss = self.BCE_loss(outputs, y_) loss.backward() self.optimizer.step() # statistics corrects += torch.sum(preds == gts) self.train_tp += torch.sum(torch.gt((preds==neg),(gts==pos))) self.train_fn += torch.sum(torch.gt(preds,gts)) self.train_tn += torch.sum(torch.lt((preds==neg),(gts==pos))) self.train_fp += torch.sum(torch.lt(preds,gts)) if ((iter+1)%100 == 0): print("Epoch: [%2d] [%4d/%4d] train loss: %.8f" %((epoch + 1), (iter + 1), self.train_data_loader.dataset.__len__()//self.batch_size, loss.data[0])) self.train_accuracy = corrects / self.train_data_loader.dataset.__len__() self.train_hist['train_loss'].append(loss.data[0]) self.train_hist['train_accuracy'].append(self.train_accuracy) print(bcolors.OKGREEN+bcolors.BOLD+"train accuracy: %.4f" %(self.train_accuracy)) print("--------------------------------------") print("| | positive | negative |") print("| positive | %7d | %7d |" %(self.train_tp, self.train_fp)) print("| negative | %7d | %7d |" %(self.train_fn, self.train_tn)) print("--------------------------------------"+bcolors.ENDC) with open(self.record_file, 'a') as f: f.write("Epoch: %d\n" %(epoch+1)) f.write("Training Accuracy: %.4f\n" %(self.train_accuracy)) f.write("--------------------------------------\n") f.write("| | positive | negative |\n") f.write("| positive | %7d | %7d |\n" %(self.train_tp, self.train_fp)) f.write("| negative | %7d | %7d |\n" %(self.train_fn, self.train_tn)) f.write("--------------------------------------\n") # compute testing accuracy and print testing information if self.dataset == 'wod': self.predict_test() with open(self.record_file, 'a') as f: f.write("Testing Accuracy: %.4f\n" %(self.test_accuracy)) f.write("--------------------------------------\n") f.write("| | positive | negative |\n") f.write("| positive | %7d | %7d |\n" %(self.test_tp, self.test_fp)) f.write("| negative | %7d | %7d |\n" %(self.test_fn, self.test_tn)) f.write("--------------------------------------\n") self.train_hist['per_epoch_time'].append(time.time()-epoch_start_time) # early stopping if not self.dataset == 'wood': if (self.train_accuracy>0.996) and (self.test_accuracy>0.996): self.save() self.early_stop = True print("[!] Early stopping!") with open(self.record_file, 'a') as f: f.write("[!] Early stopping") break self.train_hist['total_time'].append(time.time()-start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" %(np.mean(self.train_hist['per_epoch_time']), self.epoch, self.train_hist['total_time'][0])) print("Training finish!... saving training results") # save model utils.plot(self.train_hist, self.result_dir) if not self.early_stop: self.save()