def logit(x, is_training=True, update_batch_stats=True, stochastic=True, seed=1234): return cnn.logit(x, is_training=is_training, update_batch_stats=update_batch_stats, stochastic=stochastic, seed=seed)[0]
def generate_virtual_adversarial_dropout_mask(x, logit, is_training=True): logit_m, init_mask = CNN.logit(x, None, is_training=True, update_batch_stats=False, stochastic=True, seed=1234) dist = L.kl_divergence_with_logit(logit_m, logit) mask_grad = tf.stop_gradient( tf.gradients(dist, [init_mask], aggregation_method=2)[0]) return flipping_algorithm(init_mask, mask_grad)
def logit(x, masks=None, is_training=True, update_batch_stats=True, stochastic=True, seed=1234): logits, _ = CNN.logit(x, masks, is_training=is_training, update_batch_stats=update_batch_stats, stochastic=stochastic, seed=seed) return logits
def virtual_adversarial_dropout_loss(x, logit, is_training=True, name="vadt_loss"): adv_mask = generate_virtual_adversarial_dropout_mask( x, logit, is_training=is_training) logit_p = logit logit_m, _ = CNN.logit(x, adv_mask, is_training=True, update_batch_stats=True, stochastic=True, seed=1234) loss = L.kl_divergence_with_logit(logit_p, logit_m) return tf.identity(loss, name=name)
def logit(x, is_training=True, update_batch_stats=True, stochastic=True, seed=1234, dropout_mask=None, return_mask=False, h_before_dropout=None): return cnn.logit(x, is_training=is_training, update_batch_stats=update_batch_stats, stochastic=stochastic, seed=seed, dropout_mask=dropout_mask, return_mask=return_mask, h_before_dropout=h_before_dropout)
def build_training_graph(x1, y1, x2, lr, mom): global_step = tf.get_variable( name="global_step", shape=[], dtype=tf.float32, initializer=tf.constant_initializer(0.0), trainable=False, ) k = 1. * global_step / (FLAGS.num_iter_per_epoch * FLAGS.num_epochs) # lp schedule from GRL lp = (2. / (1. + tf.exp(-10. * k)) - 1) # Interpolation y2_logit, _ = cnn.logit(x2, is_training=False, update_batch_stats=False, stochastic=False) if FLAGS.one_hot: y2 = tf.stop_gradient( tf.cast(tf.one_hot(tf.argmax(y2_logit, -1), 10), tf.float32)) else: y2 = tf.stop_gradient(tf.nn.softmax(y2_logit)) dist_beta = tf.distributions.Beta(0.1, 0.1) lmb = dist_beta.sample(tf.shape(x1)[0]) lmb_x = tf.reshape(lmb, [-1, 1, 1, 1]) lmb_y = tf.reshape(lmb, [-1, 1]) x = x1 * lmb_x + x2 * (1. - lmb_x) y = y1 * lmb_y + y2 * (1. - lmb_y) label_dm = tf.concat( [tf.reshape(lmb, [-1, 1]), tf.reshape(1. - lmb, [-1, 1])], axis=1) # Calculate the feats and logits on interpolated samples with tf.variable_scope(tf.get_variable_scope(), reuse=True): logit, net = cnn.logit(x, is_training=True, update_batch_stats=True) # Alignment Loss net_ = flip_gradient(net, lp) logitsdm = tf.layers.dense(net_, 1024, activation=tf.nn.relu, name='linear_dm1') logitsdm = tf.layers.dense(logitsdm, 1024, activation=tf.nn.relu, name='linear_dm2') logits_dm = tf.layers.dense(logitsdm, 2, name="logits_dm") dm_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=label_dm, logits=logits_dm)) additional_loss = dm_loss nll_loss = tf.reduce_mean( lmb * tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logit)) loss = nll_loss + additional_loss opt = tf.train.AdamOptimizer(learning_rate=lr, beta1=mom) tvars = tf.trainable_variables() grads_and_vars = opt.compute_gradients(loss, tvars) train_op = opt.apply_gradients(grads_and_vars, global_step=global_step) return loss, train_op, global_step