def train_d(self, img, label): with tf.GradientTape() as tape: pred = self.d.call(img, training=True) loss = self.loss_func(label, pred) grads = tape.gradient(loss, self.d.trainable_variables) self.opt.apply_gradients(zip(grads, self.d.trainable_variables)) return loss, binary_accuracy(label, pred)
def train(num_epoch, patience=30, verbose=False): model.train() optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) best_loss = float("inf") best_epoch = -1 for epoch in range(num_epoch): t = time.time() optimizer.zero_grad() outputs, labels = model(features, adj) if args.cuda: labels = labels.cuda() loss_train = F.binary_cross_entropy_with_logits(outputs, labels) acc_train = binary_accuracy(outputs, labels) loss_train.backward() optimizer.step() loss = loss_train.item() accuracy = acc_train.item() if verbose: print('Epoch: {:04d}'.format(epoch + 1), 'loss_train: {:.4f}'.format(loss), 'acc_train: {:.4f}'.format(accuracy), 'time: {:.4f}s'.format(time.time() - t)) # early stop if loss < best_loss: best_loss = loss best_epoch = epoch if epoch == best_epoch + patience: break
def train_g(self, d_label): with tf.GradientTape() as tape: g_img = self.call(len(d_label), training=True) pred = self.d.call(g_img, training=False) loss = self.loss_func(d_label, pred) grads = tape.gradient(loss, self.g.trainable_variables) self.opt.apply_gradients(zip(grads, self.g.trainable_variables)) return loss, g_img, binary_accuracy(d_label, pred)
def train_d(self, img, img_label, label): with tf.GradientTape() as tape: pred_bool, pred_class = self.d.call(img, training=True) loss_bool = self.loss_bool(label, pred_bool) loss_class = self.loss_class(img_label, pred_class) loss = tf.reduce_mean(loss_bool + loss_class) grads = tape.gradient(loss, self.d.trainable_variables) self.opt.apply_gradients(zip(grads, self.d.trainable_variables)) return loss, binary_accuracy(label, pred_bool)
def train_g(self): d_label = tf.ones((BATCH_SIZE, 1), tf.float32) # let d think generated images are real with tf.GradientTape() as tape: g_img = self.call(BATCH_SIZE, training=True) pred = self.d.call(g_img, training=False) loss = self.loss_func(d_label, pred) grads = tape.gradient(loss, self.g.trainable_variables) self.opt.apply_gradients(zip(grads, self.g.trainable_variables)) return loss, g_img, binary_accuracy(d_label, pred)
def training_step(self, batch, batch_idx): examples = batch.text labels = batch.label logits = self.forward(examples).squeeze(1) loss = bce_loss_with_logits(logits, labels) acc = binary_accuracy(logits, labels) result = pl.TrainResult(loss, checkpoint_on=loss) result.log('train_loss', loss, prog_bar=True) result.log('train_acc', acc, prog_bar=True) return result
def test_step(self, batch, batch_idx): examples = batch.text labels = batch.label logits = self.forward(examples).squeeze(1) loss = bce_loss_with_logits(logits, labels) acc = binary_accuracy(logits, labels) result = pl.EvalResult() result.batch_test_loss = loss result.batch_test_acc = acc return result
def train_g(self, random_img_label): d_label = tf.ones((len(random_img_label), 1), tf.float32) # let d think generated images are real with tf.GradientTape() as tape: g_img = self.call(random_img_label, training=True) pred_bool, pred_class = self.d.call(g_img, training=False) loss_bool = self.loss_bool(d_label, pred_bool) loss_class = self.loss_class(random_img_label, pred_class) loss = tf.reduce_mean(loss_bool + loss_class) grads = tape.gradient(loss, self.g.trainable_variables) self.opt.apply_gradients(zip(grads, self.g.trainable_variables)) return loss, g_img, binary_accuracy(d_label, pred_bool)
def train(self): print(f'The model has {self.model.count_parameters():,} trainable parameters') best_valid_loss = float('inf') print(self.model) for epoch in range(self.config.num_epoch): self.model.train() epoch_loss = 0 epoch_acc = 0 start_time = time.time() for batch in self.train_iter: # For each batch, first zero the gradients self.optimizer.zero_grad() # if Field has include_lengths=False, batch.text is only padded numericalized tensor # if Field has include_lengths=True, batch.text is tuple(padded numericalized tensor, sentence length) input, input_lengths = batch.text predictions = self.model(input, input_lengths).squeeze(1) # predictions = [batch size, 1]. after squeeze(1) = [batch size]) loss = self.criterion(predictions, batch.label) acc = binary_accuracy(predictions, batch.label) loss.backward() self.optimizer.step() # 'item' method is used to extract a scalar from a tensor which only contains a single value. epoch_loss += loss.item() epoch_acc += acc.item() train_loss = epoch_loss / len(self.train_iter) train_acc = epoch_acc / len(self.train_iter) valid_loss, valid_acc = self.evaluate() end_time = time.time() epoch_mins, epoch_secs = epoch_time(start_time, end_time) if valid_loss < best_valid_loss: best_valid_loss = valid_loss torch.save(self.model.state_dict(), self.config.save_model) print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s') print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%') print(f'\tVal. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}%')
def train_d(self, real_fake_img, real_fake_d_label, fake_img_label, fake_style): with tf.GradientTape() as tape: pred_bool, pred_style, pred_class = self.d.call(real_fake_img, training=True) info_split = len(real_fake_d_label) real_fake_pred_bool = pred_bool[:info_split] loss_bool = self.loss_bool(real_fake_d_label, real_fake_pred_bool) fake_pred_style = pred_style[-info_split:] fake_pred_label = pred_class[-info_split:] loss_info = self.loss_mutual_info(fake_style, fake_pred_style, fake_img_label, fake_pred_label) loss = tf.reduce_mean(loss_bool + LAMBDA * loss_info) grads = tape.gradient(loss, self.d.trainable_variables) self.opt.apply_gradients(zip(grads, self.d.trainable_variables)) return loss, binary_accuracy(real_fake_d_label, real_fake_pred_bool), class_accuracy( fake_img_label, fake_pred_label)
def evaluate(self): epoch_loss = 0 epoch_acc = 0 self.model.eval() with torch.no_grad(): for batch in self.valid_iter: input, input_lengths = batch.text predictions = self.model(input, input_lengths).squeeze(1) loss = self.criterion(predictions, batch.label) acc = binary_accuracy(predictions, batch.label) epoch_loss += loss.item() epoch_acc += acc.item() return epoch_loss / len(self.valid_iter), epoch_acc / len(self.valid_iter)
def evaluate(model, iterator, criterion): epoch_loss = 0 epoch_acc = 0 model.eval() with torch.no_grad(): for batch in iterator: text, text_lengths = batch.text predictions = model(text, text_lengths.cpu()).squeeze() loss = criterion(predictions, batch.label) acc = binary_accuracy(predictions, batch.label) epoch_loss += loss.item() epoch_acc += acc.item() return epoch_loss / len(iterator), epoch_acc / len(iterator)
def evaluate(model,train_x,valid_x, criterion,optimizer): leng=max(len(data) for data in train_x) for j in range(len(train_x)): if(len(train_x[j])<leng): train_x[j]+=[0]*(leng-len(train_x[j])) optimizer.zero_grad() predictions = model(torch.tensor(train_x,dtype=torch.long)).squeeze(1) loss = criterion(predictions, torch.tensor(valid_x,dtype=torch.float)) acc = binary_accuracy(predictions, torch.tensor(valid_x,dtype=torch.float)) optimizer.step() epoch_loss=0 epoch_acc=0 epoch_loss += loss.item() epoch_acc += acc.item() return epoch_loss , epoch_acc
def inference(self): epoch_loss = 0 epoch_acc = 0 self.model.load_state_dict(torch.load(self.config.save_model)) self.model.eval() with torch.no_grad(): for batch in self.test_iter: input, input_lengths = batch.text predictions = self.model(input, input_lengths).squeeze(1) loss = self.criterion(predictions, batch.label) acc = binary_accuracy(predictions, batch.label) epoch_loss += loss.item() epoch_acc += acc.item() test_loss = epoch_loss / len(self.test_iter) test_acc = epoch_acc / len(self.test_iter) print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc * 100:.2f}%')
def train(model, iterator, optimizer, criterion): epoch_loss = 0 epoch_acc = 0 model.train() for batch in iterator: optimizer.zero_grad() text, text_lengths = batch.text predictions = model(text, text_lengths.cpu()).squeeze() loss = criterion(predictions, batch.label) acc = binary_accuracy(predictions, batch.label) loss.backward() optimizer.step() epoch_loss += loss.item() epoch_acc += acc.item() return epoch_loss / len(iterator), epoch_acc / len(iterator)
label_raw.append(label) print(len(user_raw)) user, neighbor, target, negative, label = torch.FloatTensor(user_raw), torch.FloatTensor(user_raw), torch.FloatTensor( target_raw), torch.FloatTensor(negative_raw), torch.FloatTensor(label_raw) dataset = Data.TensorDataset(user, neighbor, target, negative, label) dataloader = Data.DataLoader(dataset, batch_size=1024, shuffle=False) model = torch.load(model_save_path) model.to(device) model.eval() running_loss = 0.0 running_acc = 0.0 val_size = len(dataloader) criterion_binary = nn.BCELoss() for i, data in enumerate(dataloader): user, neighbor, target, negative, label = data user = user.to(device) neighbor = neighbor.to(device) target = target.to(device) negative = negative.to(device) label = label.to(device) # Forward pass output = model(user, neighbor, target, negative) # (batchsize, 1) # compute loss loss = criterion_binary(output.view(-1, 1), label.view(-1, 1)) running_loss += loss.item() running_acc += binary_accuracy(label, output.view(-1, 1)).item() * 100 torch.cuda.empty_cache() # 释放显存 print('ValLoss: {:.4f}, Acc_: {:.4f}'.format(running_loss / val_size, running_acc / val_size))
def main(unused_args): assert len(unused_args) == 1, unused_args setup_experiment(logging, FLAGS, "critic_model") if FLAGS.validation: mnist_ds = mnist.read_data_sets(FLAGS.data_dir, dtype=tf.float32, reshape=False, validation_size=0) val_ds = mnist_ds.test else: mnist_ds = mnist.read_data_sets(FLAGS.data_dir, dtype=tf.float32, reshape=False, validation_size=FLAGS.validation_size) val_ds = mnist_ds.validation train_ds = mnist_ds.train val_ds = mnist_ds.validation test_ds = mnist_ds.test num_classes = FLAGS.num_classes img_shape = [None, 1, 28, 28] X = tf.placeholder(tf.float32, shape=img_shape, name='X') # placeholder to avoid recomputation of adversarial images for critic X_hat_h = tf.placeholder(tf.float32, shape=img_shape, name='X_hat') y = tf.placeholder(tf.int32, shape=[None], name='y') y_onehot = tf.one_hot(y, num_classes) reduce_ind = list(range(1, X.get_shape().ndims)) # test/validation inputs X_v = tf.placeholder(tf.float32, shape=img_shape, name='X_v') y_v = tf.placeholder(tf.int32, shape=[None], name='y_v') y_v_onehot = tf.one_hot(y_v, num_classes) # classifier model model = create_model(FLAGS, name=FLAGS.model_name) def test_model(x, **kwargs): return model(x, train=False, **kwargs) # generator def generator(inputs, confidence, targets=None): return high_confidence_attack_unrolled( lambda x: model(x)['logits'], inputs, targets=targets, confidence=confidence, max_iter=FLAGS.attack_iter, over_shoot=FLAGS.attack_overshoot, attack_random=FLAGS.attack_random, attack_uniform=FLAGS.attack_uniform, attack_label_smoothing=FLAGS.attack_label_smoothing) def test_generator(inputs, confidence, targets=None): return high_confidence_attack(lambda x: test_model(x)['logits'], inputs, targets=targets, confidence=confidence, max_iter=FLAGS.df_iter, over_shoot=FLAGS.df_overshoot, random=FLAGS.attack_random, uniform=FLAGS.attack_uniform, clip_dist=FLAGS.df_clip) # discriminator critic = create_model(FLAGS, prefix='critic_', name='critic') # classifier outputs outs_x = model(X) outs_x_v = test_model(X_v) params = tf.trainable_variables() model_weights = [param for param in params if "weights" in param.name] vars = tf.model_variables() target_conf_v = [None] if FLAGS.attack_confidence == "same": # set the target confidence to the confidence of the original prediction target_confidence = outs_x['conf'] target_conf_v[0] = target_confidence elif FLAGS.attack_confidence == "class_running_mean": # set the target confidence to the mean confidence of the specific target # use running mean estimate class_conf_mean = tf.Variable(np.ones(num_classes, dtype=np.float32)) batch_conf_mean = tf.unsorted_segment_mean(outs_x['conf'], outs_x['pred'], num_classes) # if batch does not contain predictions for the specific target # (zeroes), replace zeroes with stored class mean (previous batch) batch_conf_mean = tf.where(tf.not_equal(batch_conf_mean, 0), batch_conf_mean, class_conf_mean) # update class confidence mean class_conf_mean = assign_moving_average(class_conf_mean, batch_conf_mean, 0.5) # init class confidence during pre-training tf.add_to_collection("PREINIT_OPS", class_conf_mean) def target_confidence(targets_onehot): targets = tf.argmax(targets_onehot, axis=1) check_conf = tf.Assert( tf.reduce_all(tf.not_equal(class_conf_mean, 0)), [class_conf_mean]) with tf.control_dependencies([check_conf]): t = tf.gather(class_conf_mean, targets) target_conf_v[0] = t return tf.stop_gradient(t) else: target_confidence = float(FLAGS.attack_confidence) target_conf_v[0] = target_confidence X_hat = generator(X, target_confidence) outs_x_hat = model(X_hat) # select examples for which attack succeeded (changed the prediction) X_hat_filter = tf.not_equal(outs_x['pred'], outs_x_hat['pred']) X_hat_f = tf.boolean_mask(X_hat, X_hat_filter) X_f = tf.boolean_mask(X, X_hat_filter) outs_x_f = model(X_f) outs_x_hat_f = model(X_hat_f) X_hatd = tf.stop_gradient(X_hat) X_rec = generator(X_hatd, outs_x['conf'], outs_x['pred']) X_rec_f = tf.boolean_mask(X_rec, X_hat_filter) # validation/test adversarial examples X_v_hat = test_generator(X_v, FLAGS.val_attack_confidence) X_v_hatd = tf.stop_gradient(X_v_hat) X_v_rec = test_generator(X_v_hatd, outs_x_v['conf'], targets=outs_x_v['pred']) X_v_hat_df = deepfool(lambda x: test_model(x)['logits'], X_v, y_v, max_iter=FLAGS.df_iter, clip_dist=FLAGS.df_clip) X_v_hat_df_all = deepfool(lambda x: test_model(x)['logits'], X_v, max_iter=FLAGS.df_iter, clip_dist=FLAGS.df_clip) y_hat = outs_x['pred'] y_adv = outs_x_hat['pred'] y_adv_f = outs_x_hat_f['pred'] tf.summary.histogram('y_data', y, collections=["model_summaries"]) tf.summary.histogram('y_hat', y_hat, collections=["model_summaries"]) tf.summary.histogram('y_adv', y_adv, collections=["model_summaries"]) # critic outputs critic_outs_x = critic(X) critic_outs_x_hat = critic(X_hat_f) critic_params = list(set(tf.trainable_variables()) - set(params)) critic_vars = list(set(tf.trainable_variables()) - set(vars)) # binary logits for a specific target logits_data = critic_outs_x['logits'] logits_data_flt = tf.reshape(logits_data, (-1, )) z_data = tf.gather(logits_data_flt, tf.range(tf.shape(X)[0]) * num_classes + y) logits_adv = critic_outs_x_hat['logits'] logits_adv_flt = tf.reshape(logits_adv, (-1, )) z_adv = tf.gather(logits_adv_flt, tf.range(tf.shape(X_hat_f)[0]) * num_classes + y_adv_f) # classifier/generator losses nll = tf.reduce_mean( tf.losses.softmax_cross_entropy(y_onehot, outs_x['logits'])) nll_v = tf.reduce_mean( tf.losses.softmax_cross_entropy(y_v_onehot, outs_x_v['logits'])) # gan losses gan = tf.losses.sigmoid_cross_entropy(tf.ones_like(z_adv), z_adv) rec_l1 = tf.reduce_mean( tf.reduce_sum(tf.abs(X_f - X_rec_f), axis=reduce_ind)) rec_l2 = tf.reduce_mean(tf.reduce_sum((X_f - X_rec_f)**2, axis=reduce_ind)) weight_decay = slim.apply_regularization(slim.l2_regularizer(1.0), model_weights[:-1]) pretrain_loss = nll + 5e-6 * weight_decay loss = nll + FLAGS.lmbd * gan if FLAGS.lmbd_rec_l1 > 0: loss += FLAGS.lmbd_rec_l1 * rec_l1 if FLAGS.lmbd_rec_l2 > 0: loss += FLAGS.lmbd_rec_l2 * rec_l2 if FLAGS.weight_decay > 0: loss += FLAGS.weight_decay * weight_decay # critic loss critic_gan_data = tf.losses.sigmoid_cross_entropy(tf.ones_like(z_data), z_data) # use placeholder for X_hat to avoid recomputation of adversarial noise y_adv_h = model(X_hat_h)['pred'] logits_adv_h = critic(X_hat_h)['logits'] logits_adv_flt_h = tf.reshape(logits_adv_h, (-1, )) z_adv_h = tf.gather(logits_adv_flt_h, tf.range(tf.shape(X_hat_h)[0]) * num_classes + y_adv_h) critic_gan_adv = tf.losses.sigmoid_cross_entropy(tf.zeros_like(z_adv_h), z_adv_h) critic_gan = critic_gan_data + critic_gan_adv # Gulrajani discriminator regularizer (we do not interpolate) critic_grad_data = tf.gradients(z_data, X)[0] critic_grad_adv = tf.gradients(z_adv_h, X_hat_h)[0] critic_grad_penalty = norm_penalty(critic_grad_adv) + norm_penalty( critic_grad_data) critic_loss = critic_gan + FLAGS.lmbd_grad * critic_grad_penalty # classifier model_metrics err = 1 - slim.metrics.accuracy(outs_x['pred'], y) conf = tf.reduce_mean(outs_x['conf']) err_hat = 1 - slim.metrics.accuracy( test_model(X_hat)['pred'], outs_x['pred']) err_hat_f = 1 - slim.metrics.accuracy( test_model(X_hat_f)['pred'], outs_x_f['pred']) err_rec = 1 - slim.metrics.accuracy( test_model(X_rec)['pred'], outs_x['pred']) conf_hat = tf.reduce_mean(test_model(X_hat)['conf']) conf_hat_f = tf.reduce_mean(test_model(X_hat_f)['conf']) conf_rec = tf.reduce_mean(test_model(X_rec)['conf']) err_v = 1 - slim.metrics.accuracy(outs_x_v['pred'], y_v) conf_v_hat = tf.reduce_mean(test_model(X_v_hat)['conf']) l2_hat = tf.sqrt(tf.reduce_sum((X_f - X_hat_f)**2, axis=reduce_ind)) tf.summary.histogram('l2_hat', l2_hat, collections=["model_summaries"]) # critic model_metrics critic_err_data = 1 - binary_accuracy( z_data, tf.ones(tf.shape(z_data), tf.bool), 0.0) critic_err_adv = 1 - binary_accuracy( z_adv, tf.zeros(tf.shape(z_adv), tf.bool), 0.0) # validation model_metrics err_df = 1 - slim.metrics.accuracy(test_model(X_v_hat_df)['pred'], y_v) err_df_all = 1 - slim.metrics.accuracy( test_model(X_v_hat_df_all)['pred'], outs_x_v['pred']) l2_v_hat = tf.sqrt(tf.reduce_sum((X_v - X_v_hat)**2, axis=reduce_ind)) l2_v_rec = tf.sqrt(tf.reduce_sum((X_v - X_v_rec)**2, axis=reduce_ind)) l1_v_rec = tf.reduce_sum(tf.abs(X_v - X_v_rec), axis=reduce_ind) l2_df = tf.sqrt(tf.reduce_sum((X_v - X_v_hat_df)**2, axis=reduce_ind)) l2_df_norm = l2_df / tf.sqrt(tf.reduce_sum(X_v**2, axis=reduce_ind)) l2_df_all = tf.sqrt( tf.reduce_sum((X_v - X_v_hat_df_all)**2, axis=reduce_ind)) l2_df_norm_all = l2_df_all / tf.sqrt(tf.reduce_sum(X_v**2, axis=reduce_ind)) tf.summary.histogram('l2_df', l2_df, collections=["adv_summaries"]) tf.summary.histogram('l2_df_norm', l2_df_norm, collections=["adv_summaries"]) # model_metrics pretrain_model_metrics = OrderedDict([('nll', nll), ('weight_decay', weight_decay), ('err', err)]) model_metrics = OrderedDict([('loss', loss), ('nll', nll), ('l2_hat', tf.reduce_mean(l2_hat)), ('gan', gan), ('rec_l1', rec_l1), ('rec_l2', rec_l2), ('weight_decay', weight_decay), ('err', err), ('conf', conf), ('err_hat', err_hat), ('err_hat_f', err_hat_f), ('conf_t', tf.reduce_mean(target_conf_v[0])), ('conf_hat', conf_hat), ('conf_hat_f', conf_hat_f), ('err_rec', err_rec), ('conf_rec', conf_rec)]) critic_metrics = OrderedDict([('c_loss', critic_loss), ('c_gan', critic_gan), ('c_gan_data', critic_gan_data), ('c_gan_adv', critic_gan_adv), ('c_grad_norm', critic_grad_penalty), ('c_err_adv', critic_err_adv), ('c_err_data', critic_err_data)]) val_metrics = OrderedDict([('nll', nll_v), ('err', err_v)]) adv_metrics = OrderedDict([('l2_df', tf.reduce_mean(l2_df)), ('l2_df_norm', tf.reduce_mean(l2_df_norm)), ('l2_df_all', tf.reduce_mean(l2_df_all)), ('l2_df_all_norm', tf.reduce_mean(l2_df_norm_all)), ('l2_hat', tf.reduce_mean(l2_v_hat)), ('conf_hat', conf_v_hat), ('l1_rec', tf.reduce_mean(l1_v_rec)), ('l2_rec', tf.reduce_mean(l2_v_rec)), ('err_df', err_df), ('err_df_all', err_df_all)]) pretrain_metric_mean, pretrain_metric_upd = register_metrics( pretrain_model_metrics, collections="pretrain_model_summaries") metric_mean, metric_upd = register_metrics(model_metrics, collections="model_summaries") critic_metric_mean, critic_metric_upd = register_metrics( critic_metrics, collections="critic_summaries") val_metric_mean, val_metric_upd = register_metrics( val_metrics, prefix="val_", collections="val_summaries") adv_metric_mean, adv_metric_upd = register_metrics( adv_metrics, collections="adv_summaries") metrics_reset = tf.variables_initializer(tf.local_variables()) # training ops lr = tf.Variable(FLAGS.lr, trainable=False) critic_lr = tf.Variable(FLAGS.critic_lr, trainable=False) tf.summary.scalar('lr', lr, collections=["model_summaries"]) tf.summary.scalar('critic_lr', critic_lr, collections=["critic_summaries"]) optimizer = tf.train.AdamOptimizer(learning_rate=lr, beta1=0.5) preinit_ops = tf.get_collection("PREINIT_OPS") with tf.control_dependencies(preinit_ops): pretrain_solver = optimizer.minimize(pretrain_loss, var_list=params) solver = optimizer.minimize(loss, var_list=params) critic_solver = (tf.train.AdamOptimizer( learning_rate=critic_lr, beta1=0.5).minimize(critic_loss, var_list=critic_params)) # train summary_images, summary_labels = select_balanced_subset( train_ds.images, train_ds.labels, num_classes, num_classes) summary_images = summary_images.transpose((0, 3, 1, 2)) save_path = os.path.join(FLAGS.samples_dir, 'orig.png') save_images(summary_images, save_path) if FLAGS.gpu_memory < 1.0: gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_memory) config = tf.ConfigProto(gpu_options=gpu_options) else: config = None with tf.Session(config=config) as sess: try: # summaries summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph) summaries = tf.summary.merge_all("model_summaries") critic_summaries = tf.summary.merge_all("critic_summaries") val_summaries = tf.summary.merge_all("val_summaries") adv_summaries = tf.summary.merge_all("adv_summaries") # initialization tf.local_variables_initializer().run() tf.global_variables_initializer().run() # pretrain model if FLAGS.pretrain_niter > 0: logging.info("Model pretraining") for epoch in range(1, FLAGS.pretrain_niter + 1): train_iterator = batch_iterator(train_ds.images, train_ds.labels, FLAGS.batch_size, shuffle=True) sess.run(metrics_reset) start_time = time.time() for ind, (images, labels) in enumerate(train_iterator): sess.run([pretrain_solver, pretrain_metric_upd], feed_dict={ X: images, y: labels }) str_bfr = six.StringIO() str_bfr.write("Pretrain epoch [{}, {:.2f}s]:".format( epoch, time.time() - start_time)) print_results_str(str_bfr, pretrain_model_metrics.keys(), sess.run(pretrain_metric_mean)) print_results_str(str_bfr, critic_metrics.keys(), sess.run(critic_metric_mean)) logging.info(str_bfr.getvalue()[:-1]) # training for epoch in range(1, FLAGS.niter + 1): train_iterator = batch_iterator(train_ds.images, train_ds.labels, FLAGS.batch_size, shuffle=True) sess.run(metrics_reset) start_time = time.time() for ind, (images, labels) in enumerate(train_iterator): batch_index = (epoch - 1) * (train_ds.images.shape[0] // FLAGS.batch_size) + ind # train critic for several steps X_hat_np = sess.run(X_hat, feed_dict={X: images}) for _ in range(FLAGS.critic_steps - 1): sess.run([critic_solver], feed_dict={ X: images, y: labels, X_hat_h: X_hat_np }) else: summary = sess.run([ critic_solver, critic_metric_upd, critic_summaries ], feed_dict={ X: images, y: labels, X_hat_h: X_hat_np })[-1] summary_writer.add_summary(summary, batch_index) # train model summary = sess.run([solver, metric_upd, summaries], feed_dict={ X: images, y: labels })[-1] summary_writer.add_summary(summary, batch_index) str_bfr = six.StringIO() str_bfr.write("Train epoch [{}, {:.2f}s]:".format( epoch, time.time() - start_time)) print_results_str(str_bfr, model_metrics.keys(), sess.run(metric_mean)) print_results_str(str_bfr, critic_metrics.keys(), sess.run(critic_metric_mean)) logging.info(str_bfr.getvalue()[:-1]) val_iterator = batch_iterator(val_ds.images, val_ds.labels, 100, shuffle=False) for images, labels in val_iterator: summary = sess.run([val_metric_upd, val_summaries], feed_dict={ X_v: images, y_v: labels })[-1] summary_writer.add_summary(summary, epoch) str_bfr = six.StringIO() str_bfr.write("Valid epoch [{}]:".format(epoch)) print_results_str(str_bfr, val_metrics.keys(), sess.run(val_metric_mean)) logging.info(str_bfr.getvalue()[:-1]) # learning rate decay update_lr = lr_decay(lr, epoch) if update_lr is not None: sess.run(update_lr) logging.debug( "learning rate was updated to: {:.10f}".format( lr.eval())) critic_update_lr = lr_decay(critic_lr, epoch, prefix='critic_') if critic_update_lr is not None: sess.run(critic_update_lr) logging.debug( "critic learning rate was updated to: {:.10f}".format( critic_lr.eval())) if epoch % FLAGS.summary_frequency == 0: samples_hat, samples_rec, samples_df, summary = sess.run( [ X_v_hat, X_v_rec, X_v_hat_df, adv_summaries, adv_metric_upd ], feed_dict={ X_v: summary_images, y_v: summary_labels })[:-1] summary_writer.add_summary(summary, epoch) save_path = os.path.join(FLAGS.samples_dir, 'epoch_orig-%d.png' % epoch) save_images(summary_images, save_path) save_path = os.path.join(FLAGS.samples_dir, 'epoch-%d.png' % epoch) save_images(samples_hat, save_path) save_path = os.path.join(FLAGS.samples_dir, 'epoch_rec-%d.png' % epoch) save_images(samples_rec, save_path) save_path = os.path.join(FLAGS.samples_dir, 'epoch_df-%d.png' % epoch) save_images(samples_df, save_path) str_bfr = six.StringIO() str_bfr.write("Summary epoch [{}]:".format(epoch)) print_results_str(str_bfr, adv_metrics.keys(), sess.run(adv_metric_mean)) logging.info(str_bfr.getvalue()[:-1]) if FLAGS.checkpoint_frequency != -1 and epoch % FLAGS.checkpoint_frequency == 0: save_checkpoint(sess, vars, epoch=epoch) save_checkpoint(sess, critic_vars, name="critic_model", epoch=epoch) except KeyboardInterrupt: logging.debug("Keyboard interrupt. Stopping training...") except NanError as e: logging.info(e) finally: sess.run(metrics_reset) save_checkpoint(sess, vars) save_checkpoint(sess, critic_vars, name="critic_model") # final accuracy test_iterator = batch_iterator(test_ds.images, test_ds.labels, 100, shuffle=False) for images, labels in test_iterator: sess.run([val_metric_upd], feed_dict={X_v: images, y_v: labels}) str_bfr = six.StringIO() str_bfr.write("Final epoch [{}]:".format(epoch)) for metric_name, metric_value in zip(val_metrics.keys(), sess.run(val_metric_mean)): str_bfr.write(" {}: {:.6f},".format(metric_name, metric_value)) logging.info(str_bfr.getvalue()[:-1])
) path='tut2-model.pt' model.load_state_dict(torch.load(path)) model = model.to(device) model.eval() sbert_model = SentenceTransformer('bert-base-nli-mean-tokens') i=0 epoch_acc = 0 mod_epoch_acc = 0 for batch in test_iter: ip, ip_len = batch.text label = batch.label ori_op = model(ip, ip_len).squeeze() acc = binary_accuracy(ori_op, label) epoch_acc += acc.item() # print(ori_op.item)) ranking, ip_text = get_ranking(ip, model, ori_op) # print(ip_text) modified_text = replace_with_synonyms(0.5, model, ranking, sbert_model, ip_text, label) # import pdb; pdb.set_trace() mod_op = predict(model, modified_text, vocab) mod_acc = binary_accuracy(torch.tensor(mod_op, device=device), label) mod_epoch_acc += mod_acc.item() print("Test accuracy " + epoch_acc / len(test_iter)) print("Test accuracy after attack using glove embedding" + mod_epoch_acc / len(test_iter)) # break
print('test_fold_id iii:', i) # 데이터로 쓸 revs는 여기서 들어감 train_loader, test_loader = Kfold_Split(revs, word_idx_map, test_fold_id=i) ##------------------------- 그때그때 모델을 만들어주고 돌아가도록 해야지 cheating이 없다. model.train() optimizer.zero_grad() ##------------------------- 그래서 얘네 둘이 for loop 위에 있다. ## Training for epoch in range(p_epochs): for idx, data in enumerate(train_loader): outputs = model(data['input_ids'].to(device)) loss = criterion(outputs, data['target'].type_as(outputs)) # loss = criterion(outputs, data['target'].type_as(outputs).squeeze(-1)) # tanh acc = binary_accuracy(outputs, data['target'].type_as(outputs)) loss.backward() # if tpu: # xm.optimizer_step(optimizer) optimizer.step() if idx % 50 == 0: print( 'epoch:', epoch, ' current acc: {:.3f}'.format( float(acc.data.cpu().numpy()))) print('training is done!') ## test total = len(test_loader) correct = 0 model.eval()
opt = train_config.optimizer(model.parameters(), **train_config.o_kwargs) print(str(model_config) + "\n") print(str(train_config) + "\n") iterations = 0 start = time.time() best_dev_acc = -1 train_iter.repeat = False header = ' Time Epoch Iteration Progress (%Epoch) Loss Accuracy' log_template = ' '.join( '{:>6.0f},{:>5.0f},{:>9.0f},{:>5.0f}/{:<5.0f} {:>7.0f}%,{:>8.6f},{:12.4f}'. split(',')) print(header) for epoch in range(train_config.epochs): train_iter.init_epoch() for batch_idx, batch in enumerate(train_iter): model.train() opt.zero_grad() iterations += 1 answer = model(batch) train_acc = binary_accuracy(answer, batch.label) print(train_acc) break break
def train(model, data_loaders_dict, optimizer, loss_fn, config, device): train_loss_list = [] val_loss_list = [] train_acc_list = [] val_acc_list = [] best_valid_acc = (-1.0)*float("Inf") current_epoch = 0 if config['LOAD_CHECKPOINT']: static_dict = load_metric(config['PATH_METRIC']) train_loss_list = static_dict['train_loss_list'] val_loss_list = static_dict['val_loss_list'] train_acc_list = static_dict['train_acc_list'] val_acc_list = static_dict['valid_acc_list'] best_valid_acc = static_dict['best_valid_acc'] current_epoch += len(train_loss_list) for epoch in range(current_epoch, config['EPOCHS']): # Mỗi epoch sẽ thực hiện 2 phase for phase in ['train', 'val']: if phase == 'train': model.train() else: model.eval() epoch_loss = 0.0 epoch_acc = 0.0 for batch in tqdm.tqdm(data_loaders_dict[phase]): ids = batch["ids"].to(device, dtype=torch.long) mask = batch["mask"].to(device, dtype=torch.long) token_type_ids = batch["token_type_ids"].to(device, dtype=torch.long) targets = batch["targets"].to(device, dtype=torch.float) # Reset tích lũy đạo hàm optimizer.zero_grad() with torch.set_grad_enabled(phase == 'train'): outputs = model(ids, mask=mask, token_type_ids=token_type_ids) loss_value = loss_fn(outputs, targets) epoch_loss += loss_value.item() accuracy = binary_accuracy(outputs, targets) epoch_acc += accuracy.item() # Lan truyền ngược và cập nhật tham số nếu phase train if phase == 'train': loss_value.backward() optimizer.step() epoch_loss = epoch_loss / len(data_loaders_dict[phase].dataset) epoch_acc = epoch_acc / len(data_loaders_dict[phase].dataset) print("Epoch {}/{} | {:^5} | Loss: {:.4f} | Acc: {:.2f} ".format(epoch + 1, config['EPOCHS'], phase, epoch_loss, epoch_acc)) if phase == 'train': train_loss_list.append(epoch_loss) train_acc_list.append(epoch_acc) else: val_loss_list.append(epoch_loss) val_acc_list.append(epoch_acc) if best_valid_acc < epoch_acc: best_valid_acc = epoch_acc save_checkpoint(config['PATH_MODEL'], model, optimizer, best_valid_acc) save_metric(config['PATH_METRIC'], train_loss_list, val_loss_list, train_acc_list, val_acc_list, best_valid_acc)