def extract_glimpses(self, images, locations): """Extracts fovea-like glimpses. Args: images: 4-D Tensor of shape [batch, height, width, channels]. locations: 2D Tensor of shape [batch, 2] with glimpse locations. Locations are in the interval of [-1, 1] where points: (-1, -1): upper left corner. (-1, 1): upper right corner. (1, 1): lower right corner. (1, -1): lower left corner. Returns: glimpses: 5D tensor of size [batch, # glimpses, height, width, channels]. """ # Get multi resolution fields of view (first is full resolution) image_shape = tf.cast(tf.shape(images)[1:3], dtype=tf.float32) start = tf.cast(self.glimpse_shape[0], dtype=tf.float32) / image_shape[0] fields_of_view = tf.cast(tf.lin_space(start, 1., self.num_resolutions), dtype=tf.float32) receptive_fields = [self.glimpse_shape] + [ tf.cast(fields_of_view[i] * image_shape, dtype=tf.int32) for i in range(1, self.num_resolutions) ] images_glimpses_list = [] for field in receptive_fields: # Extract a glimpse with specific shape and scale. images_glimpse = utils.extract_glimpse(images, size=field, offsets=locations) # Bigger receptive fields have lower resolution. images_glimpse = tf.image.resize_images(images_glimpse, size=self.glimpse_shape) # Stop gradient if self.apply_stop_gradient: images_glimpse = tf.stop_gradient(images_glimpse) images_glimpses_list.append(images_glimpse) return images_glimpses_list
def td_learning(v_tm1, r_t, pcont_t, v_t, name="TDLearning"): """Implements the TD(0)-learning loss as a TensorFlow op. The TD loss is `0.5` times the squared difference between `v_tm1` and the target `r_t + pcont_t * v_t`. See "Learning to Predict by the Methods of Temporal Differences" by Sutton. (https://link.springer.com/article/10.1023/A:1022633531479). Args: v_tm1: Tensor holding values at previous timestep, shape `[B]`. r_t: Tensor holding rewards, shape `[B]`. pcont_t: Tensor holding pcontinue values, shape `[B]`. v_t: Tensor holding values at current timestep, shape `[B]`. name: name to prefix ops created by this function. Returns: A namedtuple with fields: * `loss`: a tensor containing the batch of losses, shape `[B]`. * `extra`: a namedtuple with fields: * `target`: batch of target values for `v_tm1`, shape `[B]`. * `td_error`: batch of temporal difference errors, shape `[B]`. """ # Rank and compatibility checks. base_ops.wrap_rank_shape_assert([[v_tm1, v_t, r_t, pcont_t]], [1], name) # TD(0)-learning op. with tf.name_scope(name, values=[v_tm1, r_t, pcont_t, v_t]): # Build target. target = tf.stop_gradient(r_t + pcont_t * v_t) # Temporal difference error and loss. # Loss is MSE scaled by 0.5, so the gradient is equal to the TD error. td_error = target - v_tm1 loss = 0.5 * tf.square(td_error) return base_ops.LossOutput(loss, TDExtra(target, td_error))
def __init__(self, sess, state_dim, action_dim, learning_rate): self._entropy = 5. self.quality = 0 self.s_dim = state_dim self.a_dim = action_dim self.lr_rate = learning_rate self.sess = sess self.R = tf.placeholder(tf.float32, [None, 1]) self.inputs = tf.placeholder(tf.float32, [None, self.s_dim[0], self.s_dim[1]]) self.old_pi = tf.placeholder(tf.float32, [None, self.a_dim]) self.acts = tf.placeholder(tf.float32, [None, self.a_dim]) self.entropy_weight = tf.placeholder(tf.float32) self.pi, self.val = self.CreateNetwork(inputs=self.inputs) self.real_out = tf.clip_by_value(self.pi, ACTION_EPS, 1. - ACTION_EPS) self.log_prob = tf.log(tf.reduce_sum(tf.multiply(self.real_out, self.acts), reduction_indices=1, keepdims=True)) self.entropy = tf.multiply(self.real_out, tf.log(self.real_out)) self.adv = tf.stop_gradient(self.R - self.val) self.a2closs = self.log_prob * self.adv # Get all network parameters self.network_params = \ tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='actor') # Set all network parameters self.input_network_params = [] for param in self.network_params: self.input_network_params.append( tf.placeholder(tf.float32, shape=param.get_shape())) self.set_network_params_op = [] for idx, param in enumerate(self.input_network_params): self.set_network_params_op.append( self.network_params[idx].assign(param)) self.loss = - tf.reduce_sum(self.a2closs) \ + self.entropy_weight * tf.reduce_sum(self.entropy) self.optimize = tf.train.AdamOptimizer(self.lr_rate).minimize(self.loss) self.val_loss = tflearn.mean_square(self.val, self.R) self.val_opt = tf.train.AdamOptimizer(self.lr_rate * 10.).minimize(self.val_loss)
def compute_loss(self, done, new_state, memory, gamma=0.99): if done: reward_sum = 0. # terminal else: reward_sum = self.local_model( tf.convert_to_tensor(new_state[None, :], dtype=tf.float32))[-1].numpy()[0] # Get discounted rewards discounted_rewards = [] for reward in memory.rewards[::-1]: # reverse buffer r reward_sum = reward + gamma * reward_sum discounted_rewards.append(reward_sum) discounted_rewards.reverse() logits, values = self.local_model( tf.convert_to_tensor(np.vstack(memory.states), dtype=tf.float32)) # Get our advantages advantage = tf.convert_to_tensor(np.array(discounted_rewards)[:, None], dtype=tf.float32) - values # Value loss value_loss = advantage ** 2 # Calculate our policy loss policy = tf.nn.softmax(logits) entropy = tf.nn.softmax_cross_entropy_with_logits_v2(labels=policy, logits=logits) policy_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=memory.actions, logits=logits) policy_loss *= tf.stop_gradient(advantage) policy_loss -= 0.01 * entropy total_loss = tf.reduce_mean((0.5 * value_loss + policy_loss)) return total_loss
def train(self, X, lr=0.01, batch_size=64, epochs=5, modelname='vh_weights'): with self.graph.as_default(): print(self.bv.shape) tf_v = tf.placeholder(tf.float32, [batch_size, self.bv.shape[0]]) v = tf.round(tf_v) vk = tf.identity(v) i = tf.constant(0) _, _, vk = tf.while_loop( cond=lambda i, k, *args: i <= k, body=self.gibbs_step, loop_vars=[i, tf.constant(self.cd_steps), vk], parallel_iterations=1, back_prop=False) vk = tf.stop_gradient(vk) loss = self.energy(v) - self.energy(vk) optimizer = tf.train.AdamOptimizer(lr).minimize(loss) init = tf.global_variables_initializer() saver = tf.train.Saver([self.W]) with tf.Session(graph=self.graph) as sess: init.run() for epoch in range(epochs): losses = [] for i in range(0, len(X) - batch_size, batch_size): x_batch = X[i:i + batch_size] l, _ = sess.run([loss, optimizer], feed_dict={tf_v: x_batch}) losses.append(l) print('Epoch Cost %d: ' % (epoch), np.mean(losses)) self.modelW = self.W.eval() saver.save(sess, modelname + str(self.nh))
def _build_q_loss(self, batch): s1 = batch['s1'] s2 = batch['s2'] a1 = batch['a1'] r = batch['r'] dsc = batch['dsc'] _, a2_p, log_pi_a2_p = self._p_fn(s2) q2_targets = [] q1_preds = [] for q_fn, q_fn_target in self._q_fns: q2_target_ = q_fn_target(s2, a2_p) q1_pred = q_fn(s1, a1) q1_preds.append(q1_pred) q2_targets.append(q2_target_) q2_targets = tf.stack(q2_targets, axis=-1) q2_target = self._ensemble_q2_target(q2_targets) v2_target = q2_target - self._get_alpha_entropy() * log_pi_a2_p if self._value_penalty: div_estimate = self._div_estimate(s2) v2_target = v2_target - self._get_alpha() * div_estimate q1_target = tf.stop_gradient(r + dsc * self._discount * v2_target) q_losses = [] for q1_pred in q1_preds: q_loss_ = tf.reduce_mean(tf.square(q1_pred - q1_target)) q_losses.append(q_loss_) q_loss = tf.add_n(q_losses) q_w_norm = self._get_q_weight_norm() norm_loss = self._weight_decays[0] * q_w_norm loss = q_loss + norm_loss # info info = collections.OrderedDict() info['q_loss'] = q_loss info['q_norm'] = q_w_norm info['r_mean'] = tf.reduce_mean(r) info['dsc_mean'] = tf.reduce_mean(dsc) info['q2_target_mean'] = tf.reduce_mean(q2_target) info['q1_target_mean'] = tf.reduce_mean(q1_target) return loss, info
def shake_shake_branch(x, output_filters, stride, rand_forward, rand_backward, hparams): """Building a 2 branching convnet.""" is_training = hparams.mode == tf.estimator.ModeKeys.TRAIN x = tf.nn.relu(x) x = tf.layers.conv2d(x, output_filters, (3, 3), strides=(stride, stride), padding="SAME", name="conv1") x = tf.layers.batch_normalization(x, training=is_training, name="bn1") x = tf.nn.relu(x) x = tf.layers.conv2d(x, output_filters, (3, 3), padding="SAME", name="conv2") x = tf.layers.batch_normalization(x, training=is_training, name="bn2") if is_training: x = x * rand_backward + tf.stop_gradient(x * rand_forward - x * rand_backward) else: x *= 1.0 / hparams.shake_shake_num_branches return x
def sqrt_sparsity(motion_map): """A regularizer that encourages sparsity. This regularizer penalizes nonzero values. Close to zero it behaves like an L1 regularizer, and far away from zero its strength decreases. The scale that distinguishes "close" from "far" is the mean value of the absolute of `motion_map`. Args: motion_map: A tf.Tensor of shape [B, H, W, C] Returns: A scalar tf.Tensor, the regularizer to be added to the training loss. """ with tf.name_scope('drift'): tensor_abs = tf.abs(motion_map) mean = tf.stop_gradient( tf.reduce_mean(tensor_abs, axis=[1, 2], keep_dims=True)) # We used L0.5 norm here because it's more sparsity encouraging than L1. # The coefficients are designed in a way that the norm asymptotes to L1 in # the small value limit. return tf.reduce_mean(2 * mean * tf.sqrt(tensor_abs / (mean + 1e-24) + 1))
def bit_to_int(self, x_bit, num_bits, base=2): """Turn x_bit representing numbers bitwise (lower-endian) to int tensor. Args: x_bit: Tensor containing numbers in a particular base to be converted to int. num_bits: Number of bits in the representation. base: Base of the representation. Returns: Integer representation of this number. """ x_l = tf.stop_gradient(tf.to_int32(tf.reshape(x_bit, [-1, num_bits]))) # pylint: disable=g-complex-comprehension x_labels = [ x_l[:, i] * tf.to_int32(base)**tf.to_int32(i) for i in range(num_bits) ] res = sum(x_labels) return tf.to_int32( tf.reshape(res, common_layers.shape_list(x_bit)[:-1]))
def pixels_from_softmax(frame_logits, pure_sampling=False, temperature=1.0, gumbel_noise_factor=0.2): """Given frame_logits from a per-pixel softmax, generate colors.""" # If we're purely sampling, just sample each pixel. if pure_sampling or temperature == 0.0: return common_layers.sample_with_temperature(frame_logits, temperature) # Gumbel-sample from the pixel sofmax and average by pixel values. pixel_range = tf.to_float(tf.range(256)) for _ in range(len(frame_logits.get_shape().as_list()) - 1): pixel_range = tf.expand_dims(pixel_range, axis=0) frame_logits = tf.nn.log_softmax(frame_logits) gumbel_samples = discretization.gumbel_sample( common_layers.shape_list(frame_logits)) * gumbel_noise_factor frame = tf.nn.softmax((frame_logits + gumbel_samples) / temperature, axis=-1) result = tf.reduce_sum(frame * pixel_range, axis=-1) # Round on the forward pass, not on the backward one. return result + tf.stop_gradient(tf.round(result) - result)
def make_graph(): uniform = tfp.distributions.Uniform() input_tensor = uniform.sample((batch_size_tensor, target_fun.n_dims)) target_tensor = target_fun.eval_tf(input_tensor) current_tensor = encoding(input_tensor, False, "encoding") for i in range(config["network"]["n_hidden_layers"]): current_tensor = linear_layer(current_tensor, config["network"]["n_neurons"], tf.float16, f"fc{i}", False) current_tensor = activation(current_tensor, config["network"]["activation"]) output_tensor = linear_layer(current_tensor, target_fun.n_channels, tf.float16, f"fc_out", False) output_tensor = activation(output_tensor, config["network"]["output_activation"]) relative_l2_error = (target_tensor - output_tensor)**2 / (tf.stop_gradient(output_tensor)**2 + 0.01) loss = tf.math.reduce_mean(relative_l2_error) LOSS_SCALE = 128 variables = tf.trainable_variables() gradients, _ = compute_gradients(loss, variables, LOSS_SCALE) train_op, _ = get_train_op(config, variables, gradients, optimizer) return train_op, loss, input_tensor, output_tensor
def cw_attack(sess, x, logits, n_ae, final=False): cw_attack_obj = CarliniWagnerL2(logits, sess=sess, back='tf') if final: cw_params = { 'binary_search_steps': 9, 'max_iterations': 2000, 'learning_rate': 0.01, 'initial_const': 1.0, 'abort_early': True, 'batch_size': n_ae } else: cw_params = { 'binary_search_steps': 5, 'max_iterations': 500, 'learning_rate': 0.01, 'initial_const': 1.0, 'batch_size': n_ae # need to specify, since CarliniWagnerL2 is not completely symbolic } adv_ex_tensor = cw_attack_obj.generate(x, **cw_params) adv_ex_tensor = tf.stop_gradient(adv_ex_tensor) return adv_ex_tensor
def _get_fake_data(self, inputs, mlm_logits): """Sample from the generator to create corrupted input.""" inputs = pretrain_helpers.unmask(inputs) disallow = tf.one_hot( inputs.masked_lm_ids, depth=self._bert_config.vocab_size, dtype=tf.float32) if self._config.disallow_correct else None sampled_tokens = tf.stop_gradient( pretrain_helpers.sample_from_softmax(mlm_logits / self._config.temperature, disallow=disallow)) sampled_tokids = tf.argmax(sampled_tokens, -1, output_type=tf.int32) updated_input_ids, masked = pretrain_helpers.scatter_update( inputs.input_ids, sampled_tokids, inputs.masked_lm_positions) labels = masked * (1 - tf.cast( tf.equal(updated_input_ids, inputs.input_ids), tf.int32)) updated_inputs = pretrain_data.get_updated_inputs( inputs, input_ids=updated_input_ids) FakedData = collections.namedtuple( "FakedData", ["inputs", "is_fake_tokens", "sampled_tokens"]) return FakedData(inputs=updated_inputs, is_fake_tokens=labels, sampled_tokens=sampled_tokens)
def build_trainer(self, child_model): """Build the train ops by connecting Controller with a Child.""" # actor self.valid_loss = tf.to_float(child_model.rl_loss) self.valid_loss = tf.stop_gradient(self.valid_loss) self.valid_ppl = tf.exp(self.valid_loss) self.reward = REWARD_CONSTANT / self.valid_ppl if self.params.controller_entropy_weight: self.reward += self.params.controller_entropy_weight * self.sample_entropy # or baseline self.sample_log_probs = tf.reduce_sum(self.sample_log_probs) self.baseline = tf.Variable(0.0, dtype=tf.float32, trainable=False) baseline_update = tf.assign_sub( self.baseline, ((1 - self.params.controller_baseline_dec) * (self.baseline - self.reward))) with tf.control_dependencies([baseline_update]): self.reward = tf.identity(self.reward) self.loss = self.sample_log_probs * (self.reward - self.baseline) self.train_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='train_step') tf_vars = [ var for var in tf.trainable_variables() if var.name.startswith(self.name) ] self.train_op, self.optimizer, self.grad_norm = _build_train_op( loss=self.loss, tf_vars=tf_vars, learning_rate=self.params.controller_learning_rate, train_step=self.train_step, num_aggregate=self.params.controller_num_aggregate)
def mine(x, z, name_net="estimator_network"): """Computes I(X, Z). Uses the algorithm in "Mutual Information Neural Estimation" (https://arxiv.org/pdf/1801.04062.pdf). Args: x: Samples from x [batch_size, size_x]. z: Samples from z [batch_size, size_z]. name_net: Scope for the variables forming the network. Returns: Estimate of the mutual information and the update op for the optimizer. """ z_shuffled = vae.shuffle_codes(z) concat_x_x = tf.concat([x, x], axis=0) concat_z_z_shuffled = tf.stop_gradient(tf.concat([z, z_shuffled], axis=0)) with tf.variable_scope(name_net, reuse=tf.AUTO_REUSE): d1_x = tf.layers.dense(concat_x_x, 20, name="d1_x") d1_z = tf.layers.dense(concat_z_z_shuffled, 20, name="d1_z") d1 = tf.nn.elu(d1_x + d1_z, name="d1") d2 = tf.layers.dense(d1, 1, name="d2") batch_size = tf.shape(x)[0] pred_x_z = d2[:batch_size] pred_x_z_shuffled = d2[batch_size:] loss = -(tf.reduce_mean(pred_x_z, axis=0) + tf.math.log( tf.to_float(batch_size)) - tf.math.reduce_logsumexp(pred_x_z_shuffled)) all_variables = tf.trainable_variables() mine_vars = [ var for var in all_variables if "estimator_network" in var.name ] mine_op = tf.train.AdamOptimizer(learning_rate=0.01).minimize( loss=loss, var_list=mine_vars) return -loss, mine_op
def build_new_parameters( loss: tf.Tensor, parameters: Dict[str, tf.Tensor], optimizer: tf.train.Optimizer, first_order: bool = False, ): """Builds new parameters via an optimization step on the provided loss. Parameters ---------- loss : <float32> [] tensor A scalar tensor that represents the loss. parameters : dict of variables or tensors A dictionary of initial parameters. optimizer : Optimizer An optimizer used for computing parameter updates. first_order : bool, optional (default: False) If True, gradients of the parameters computed by the optimizer are added to the graph as constants. This will zeros out the second order terms under subsequent differentiation. Returns ------- new_parameters : dict of tensors A dictionary of update parameters. """ param_names, param_values = zip(*parameters.items()) grads_and_vars = optimizer.compute_gradients(loss, param_values) # Prevent backprop through the gradients, if necessary. if first_order: grads_and_vars = [(tf.stop_gradient(g), v) for g, v in grads_and_vars] new_parameters = dict( zip(param_names, optimizer.compute_updates(grads_and_vars))) return new_parameters
def inner(*args, **kwargs): result = tf.stop_gradient(f(*args, **kwargs)) scope = tf.get_default_graph().get_name_scope() def grad(dresult, variables=None): with tf.GradientTape() as t: t.watch(args) if variables is not None: t.watch(variables) # we need to outsmart XLA here to force a control dependency zero_with_control_dependency = tf.reduce_mean(dresult[0] * 1e-30) new_args = [] for a in args: if a.dtype.is_floating: new_args.append( a + tf.cast(zero_with_control_dependency, a.dtype)) else: new_args.append(a) with tf.control_dependencies([dresult]): if bf16: with tf.tpu.bfloat16_scope(): with tf.variable_scope(scope, reuse=True): result = f(*new_args, **kwargs) else: with tf.variable_scope(scope, reuse=True): result = f(*new_args, **kwargs) kw_vars = [] if variables is not None: kw_vars = list(variables) grads = t.gradient(result, list(new_args) + kw_vars, output_gradients=[dresult]) return grads[:len(new_args)], grads[len(new_args):] return result, grad
def _coordinates_to_heatmap_sparse(y_grid, x_grid, y_coordinates, x_coordinates, sigma, channel_onehot, channel_weights=None): """Sparse version of coordinates to heatmap using tf.scatter.""" if not hasattr(tf, 'tensor_scatter_nd_max'): raise RuntimeError( ('Please upgrade tensowflow to use `tensor_scatter_nd_max` or set ' 'compute_heatmap_sparse=False')) _, num_channels = ( shape_utils.combined_static_and_dynamic_shape(channel_onehot)) height, width = shape_utils.combined_static_and_dynamic_shape(y_grid) x_grid = tf.expand_dims(x_grid, 2) y_grid = tf.expand_dims(y_grid, 2) # The raw center coordinates in the output space. x_diff = x_grid - tf.math.floor(x_coordinates) y_diff = y_grid - tf.math.floor(y_coordinates) squared_distance = x_diff**2 + y_diff**2 gaussian_map = tf.exp(-squared_distance / (2 * sigma * sigma)) if channel_weights is not None: gaussian_map = gaussian_map * channel_weights[tf.newaxis, tf.newaxis, :] channel_indices = tf.argmax(channel_onehot, axis=1) channel_indices = channel_indices[:, tf.newaxis] heatmap_init = tf.zeros((num_channels, height, width)) gaussian_map = tf.transpose(gaussian_map, (2, 0, 1)) heatmap = tf.tensor_scatter_nd_max( heatmap_init, channel_indices, gaussian_map) # Maximum of an empty tensor is -inf, the following is to avoid that. heatmap = tf.maximum(heatmap, 0) return tf.stop_gradient(tf.transpose(heatmap, (1, 2, 0)))
def process_single_frame(prev_outputs, inputs): """Process a single frame of the video.""" cur_image, input_reward, action = inputs time_step, prev_image, prev_reward, frame_buf, lstm_states = prev_outputs # sample from softmax (by argmax). this is noop for non-softmax loss. prev_image = self.get_sampled_frame(prev_image) generated_items = [prev_image] groundtruth_items = [cur_image] done_warm_start = tf.greater(time_step, context_frames - 1) input_image, = self.get_scheduled_sample_inputs( done_warm_start, groundtruth_items, generated_items, ss_func) # Prediction pred_image, lstm_states, _ = self.construct_predictive_tower( input_image, None, action, lstm_states, latent) if self.hparams.reward_prediction: reward_input_image = self.get_sampled_frame(pred_image) if self.hparams.reward_prediction_stop_gradient: reward_input_image = tf.stop_gradient(reward_input_image) with tf.control_dependencies([time_step]): frame_buf = [reward_input_image] + frame_buf[:-1] pred_reward = self.reward_prediction(frame_buf, None, action, latent) pred_reward = common_video.decode_to_shape( pred_reward, common_layers.shape_list(input_reward), "reward_dec") else: pred_reward = prev_reward time_step += 1 outputs = (time_step, pred_image, pred_reward, frame_buf, lstm_states) return outputs
def get_disc_loss(args, x, x_fake, score_func, z_outer, neg_kl_outer): opt_disc = tf.train.AdamOptimizer(learning_rate=args.learning_rate, beta1=args.beta1, beta2=args.beta2) fx = score_func(x, z_outer) f_fake_x = score_func(x_fake, z_outer) f_loss = tf.reduce_mean(-fx) + tf.reduce_mean(f_fake_x) loss = f_loss + tf.reduce_mean(-neg_kl_outer) if args.gp_lambda > 0: # add gradient penalty alpha = tf.random.uniform(shape=(tf.shape(x)[0], 1, 1)) x_hat = alpha * x + (1 - alpha) * x_fake d_hat = score_func(x_hat, tf.stop_gradient(z_outer)) ddx = tf.gradients(d_hat, x_hat)[0] ddx = tf.sqrt(tf.reduce_sum(tf.square(ddx), axis=[1, 2])) ddx = tf.reduce_mean(tf.square(ddx - 1.0)) * args.gp_lambda loss = loss + ddx gvs = opt_disc.compute_gradients( loss, var_list=tf.trainable_variables(scope='score_func')) gvs = [(tf.where(tf.is_nan(grad), tf.zeros_like(grad), grad), val) for grad, val in gvs if grad is not None] train_disc = opt_disc.apply_gradients(gvs) return f_loss, train_disc
def __call__(self, inputs, state, scope=None): """Run the cell with the declared dropouts.""" orig_inputs = inputs if self._input_keep_prob < 1: # When using functional_rnn, the forward pass will be recomputed in the # backprop. So we need to make the dropout layer deterministic between # farward and backward pass. So we use stateless random to make sure the # generated random number is deterministic with a given seed. We also # want the drop out to be random across different global steps and time # steps. So we put both of them to the seeds. seeds = tf.stop_gradient( tf.stack([ tf.cast(self._global_step, tf.int32) + tf.reduce_sum(state[1]), int(hashlib.md5( inputs.name.encode("utf-8")).hexdigest()[:8], 16) & 0x7FFFFFFF ])) keep_prob = tf.convert_to_tensor( self._input_keep_prob, dtype=tf.float32, name="keep_prob") random_tensor = keep_prob + contrib_stateless.stateless_random_uniform( inputs.get_shape(), seed=tf.cast(seeds, tf.int32), dtype=tf.float32) binary_tensor = tf.cast(tf.floor(random_tensor), inputs.dtype) inputs = tf.div(inputs, tf.cast(keep_prob, inputs.dtype)) * binary_tensor with tf.variable_scope("cell_fn", reuse=tf.AUTO_REUSE): state0 = state[0] if self._enable else state output, new_state = self._cell(inputs, state0, scope=scope) if self._seq_len is not None and self._enable: seq_len = tf.reshape(self._seq_len, [-1]) padding = tf.reshape(state[1], [-1]) < (tf.reduce_max(seq_len) - seq_len) output = tf.where(padding, orig_inputs, output) new_state = contrib_rnn.LSTMStateTuple( tf.where(padding, state[0].c, new_state.c), tf.where(padding, state[0].h, new_state.h)) if self._enable: return output, [new_state, state[1] + 1] else: return output, new_state
def _build(self, inputs): (shared_inputs, extra_policy_inputs) = inputs policy_in = tf.concat([shared_inputs, extra_policy_inputs], axis=1) policy = snt.nets.MLP(output_sizes=self._policy_layers, activation=self._activation, name='policy_mlp')(policy_in) # Sample an action from the policy logits. action = tf.multinomial(policy, num_samples=1, output_dtype=tf.int32) action = tf.squeeze(action, 1) # [B, 1] -> [B] if self._policy_clip_abs_value > 0: policy = snt.clip_gradient( net=policy, clip_value_min=-self._policy_clip_abs_value, clip_value_max=self._policy_clip_abs_value) baseline_in = tf.concat( [shared_inputs, tf.stop_gradient(policy)], axis=1) baseline = snt.nets.MLP(self._baseline_layers, activation=self._activation, name='baseline_mlp')(baseline_in) baseline = tf.squeeze(baseline, axis=-1) # [B, 1] -> [B] if self._policy_clip_abs_value > 0: baseline = snt.clip_gradient( net=baseline, clip_value_min=-self._policy_clip_abs_value, clip_value_max=self._policy_clip_abs_value) outputs = PolicyOutputs(policy=policy, action=action, baseline=baseline) return outputs
def _crop_pool_layer(self, bottom, rois, name): with tf.variable_scope(name): batch_ids = tf.squeeze( tf.slice(rois, [0, 0], [-1, 1], name="batch_id"), [1]) # Get the normalized coordinates of bboxes bottom_shape = tf.shape(bottom) height = (tf.to_float(bottom_shape[1]) - 1.) * np.float32( self._feat_stride[0]) width = (tf.to_float(bottom_shape[2]) - 1.) * np.float32( self._feat_stride[0]) x1 = tf.slice(rois, [0, 1], [-1, 1], name="x1") / width y1 = tf.slice(rois, [0, 2], [-1, 1], name="y1") / height x2 = tf.slice(rois, [0, 3], [-1, 1], name="x2") / width y2 = tf.slice(rois, [0, 4], [-1, 1], name="y2") / height # Won't be backpropagated to rois anyway, but to save time bboxes = tf.stop_gradient(tf.concat([y1, x1, y2, x2], axis=1)) pre_pool_size = cfg.FLAGS.roi_pooling_size * 2 crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [pre_pool_size, pre_pool_size], name="crops") return slim.max_pool2d(crops, [2, 2], padding='SAME')
def conditional_linear_classifier(self, inputs, n_classes, probs, add_bias=True): """""" input_shape = tf.shape(inputs) batch_size = input_shape[0] bucket_size = input_shape[1] input_size = inputs.get_shape().as_list()[-1] if len(probs.get_shape().as_list()) == 2: probs = tf.to_float( tf.one_hot(tf.to_int64(probs), bucket_size, 1, 0)) else: probs = tf.stop_gradient(probs) if self.moving_params is None: keep_prob = self.mlp_keep_prob else: keep_prob = 1 if isinstance(keep_prob, tf.Tensor) or keep_prob < 1: noise_shape = tf.stack([batch_size, 1, 1, input_size]) inputs = tf.nn.dropout(inputs, keep_prob, noise_shape=noise_shape) lin = linalg.linear(inputs, n_classes, add_bias=add_bias, initializer=tf.zeros_initializer, moving_params=self.moving_params) weighted_lin = tf.batch_matmul(lin, tf.expand_dims(probs, 3), adj_x=True) return weighted_lin, lin
def __init__(self, auxiliary_task, predict_from_pixels, feat_dim=None, scope='dynamics'): self.scope = scope self.auxiliary_task = auxiliary_task self.hidsize = self.auxiliary_task.hidsize self.feat_dim = feat_dim self.obs = self.auxiliary_task.obs self.last_ob = self.auxiliary_task.last_ob self.ac = self.auxiliary_task.ac self.ac_space = self.auxiliary_task.ac_space self.ob_mean = self.auxiliary_task.ob_mean self.ob_std = self.auxiliary_task.ob_std ############################################# # 여기는 수정부분 ############################################# if predict_from_pixels: self.features = self.get_features(self.obs, reuse=False) else: self.features = tf.stop_gradient(self.auxiliary_task.features) self.out_features = self.auxiliary_task.next_features with tf.variable_scope(self.scope + "_loss"): self.loss = self.get_loss()
def pgd_attack(clean_inputs, clean_labels, logits, p_norm, eps, pgd_n_iter): """ Symbolic definition of the PGD attack """ attack = MadryEtAl(logits) attack_params = { 'nb_iter': pgd_n_iter, 'clip_min': 0.0, 'clip_max': 1.0, 'y': clean_labels, 'ord': p_norm, 'eps': eps } if p_norm == np.inf: attack_params['eps_iter'] = attack_params['eps'] / pgd_n_iter * 2 attack_params['pgd_update'] = 'sign' elif p_norm == 2: attack_params['eps_iter'] = attack_params['eps'] / pgd_n_iter * 2 attack_params['pgd_update'] = 'plain' else: raise Exception('Wrong p_norm.') adv_ex_tensor = attack.generate(clean_inputs, **attack_params) adv_ex_tensor = tf.stop_gradient(adv_ex_tensor) return adv_ex_tensor
def reinforce_loss_discrete(classification_logits_t, classification_labels_t, locations_logits_t, locations_labels_t, use_punishment=False): """Computes REINFORCE loss for contentious discrete action spaces. Args: classification_logits_t: List of classification logits at each time point. classification_labels_t: List of classification labels at each time point. locations_logits_t: List of location logits at each time point. locations_labels_t: List of location labels at each time point. use_punishment: (Boolean) Reward {-1, 1} if true else {0, 1}. Returns: reinforce_loss: REINFORCE loss. """ classification_logits = tf.concat(classification_logits_t, axis=0) classification_labels = tf.concat(classification_labels_t, axis=0) locations_logits = tf.concat(locations_logits_t, axis=0) locations_labels = tf.concat(locations_labels_t, axis=0) rewards = tf.cast(tf.equal( tf.argmax(classification_logits, axis=1, output_type=classification_labels.dtype), classification_labels), dtype=tf.float32) # size (batch_size) each if use_punishment: # Rewards is \in {-1 and 1} instead of {0, 1}. rewards = 2. * rewards - 1. neg_advs = tf.stop_gradient(rewards - tf.reduce_mean(rewards)) log_prob = -tf.nn.sparse_softmax_cross_entropy_with_logits( logits=locations_logits, labels=locations_labels) loss = -tf.reduce_mean(neg_advs * log_prob) return loss
def mode(self, straight_through_gradient=False, maximum=False): """Mode of the distribution. Args: straight_through_gradient: Boolean; if True, it uses the straight-through gradient estimator for the mode. Otherwise there is no gradient with respect to the mixing coefficients due to the `argmax` op. maximum: if True, attempt to return the highest-density mode. Returns: Mode. """ mode_value = self._distributions.mode() mixing_log_prob = self.mixing_log_prob if maximum: mixing_log_prob += self._maybe_mask(self._component_log_prob(mode_value)) mask = tf.one_hot(tf.argmax(mixing_log_prob, axis=1), mixing_log_prob.shape[1], axis=1) if straight_through_gradient: soft_mask = tf.nn.softmax(mixing_log_prob, axis=1) mask = tf.stop_gradient(mask - soft_mask) + soft_mask return tf.reduce_sum(mask * mode_value, 1)
def _build_train_op(self): """Builds a training op. Returns: train_op: An op performing one step of training from replay data. """ replay_action_one_hot = tf.one_hot(self._replay.actions, self.num_actions, 1., 0., name='action_one_hot') replay_chosen_q = tf.reduce_sum(self._replay_net_outputs.q_values * replay_action_one_hot, reduction_indices=1, name='replay_chosen_q') target = tf.stop_gradient(self._build_target_q_op()) loss = tf.losses.huber_loss(target, replay_chosen_q, reduction=tf.losses.Reduction.NONE) if self.summary_writer is not None: with tf.variable_scope('Losses'): tf.summary.scalar('HuberLoss', tf.reduce_mean(loss)) return self.optimizer.minimize(tf.reduce_mean(loss))
def lagrangian_optimizer_kld(train_set, additive_slack, learning_rate, learning_rate_constraint, loops): """Implements surrogate-based Lagrangian optimizer (Algorithm 2). Specifically solves: min_{theta} sum_{G = 0, 1} KLD(p, pprG(theta)) s.t. error_rate <= additive_slack, where p is the overall proportion of positives and pprG is the positive prediction rate for group G. We frame this as a constrained optimization problem: min_{theta, xi_pos0, xi_pos1, xi_neg0, xi_neg1} { -p log(xi_pos0) - (1-p) log(xi_neg0) - p log(xi_pos1) -(1-p) log(xi_neg1)} s.t. error_rate <= additive_slack, xi_pos0 <= ppr0(theta), xi_neg0 <= npr0(theta), xi_pos1 <= ppr1(theta), xi_neg1 <= npr1(theta), and formulate the Lagrangian: max_{lambda's >= 0} min_{xi's} { -p log(xi_pos0) - (1-p) log(xi_neg0) - p log(xi_pos1) -(1-p) log(xi_neg1) + lambda_pos0 (xi_pos0 - ppr0(theta)) + lambda_neg0 (xi_neg0 - npr0(theta)) + lambda_pos1 (xi_pos1 - ppr1(theta)) + lambda_neg1 (xi_neg1 - npr1(theta))} s.t. error_rate <= additive_slack. We do best response for the slack variables xi: BR for xi_pos0 = p / lambda_pos0 BR for xi_neg0 = (1 - p) / lambda_neg0 BR for xi_pos1 = p / lambda_pos1 BR for xi_neg1 = (1 - p) / lambda_neg1 We do gradient ascent on the lambda's, where Gradient w.r.t. lambda_pos0 = BR for xi_pos0 - ppr0(theta) = p / lambda_pos0 - ppr0(theta) = Gradient w.r.t. lambda_pos0 of (p log(lambda_pos0) - lambda_pos0 ppr0(theta)) Gradient w.r.t. lambda_neg0 = Gradient w.r.t. lambda_neg0 of ((1 - p) log(lambda_neg0) - lambda_neg0 npr0(theta)) Gradient w.r.t. lambda_pos1 = Gradient w.r.t. lambda_pos1 of (p log(lambda_pos1) - lambda_pos1 ppr1(theta)) Gradient w.r.t. lambda_neg1 = Gradient w.r.t. lambda_neg1 of ((1 - p) log(lambda_neg1) - lambda_neg1 npr1(theta)). We do gradient descent on thetas's, with ppr's and npr's replaced with hinge surrogates. We use concave lower bounds on ppr's and npr's, so that when they get negated in the updates, we get convex upper bounds. See Appendix D.1 in the paper for more details. Args: train_set: (features, labels, groups) additive_slack: float, additive slack on error rate constraint learning_rate: float, learning rate for model parameters learning_rate_constraint: float, learning rate for Lagrange multipliers loops: int, number of iterations Returns: stochastic_model containing list of models and probabilities, deterministic_model. """ x_train, y_train, z_train = train_set dimension = x_train.shape[-1] tf.reset_default_graph() # Data tensors. features_tensor = tf.constant(x_train.astype("float32"), name="features") labels_tensor = tf.constant(y_train.astype("float32"), name="labels") # Linear model. weights = tf.Variable(tf.zeros(dimension, dtype=tf.float32), name="weights") threshold = tf.Variable(0, name="threshold", dtype=tf.float32) predictions_tensor = (tf.tensordot(features_tensor, weights, axes=(1, 0)) + threshold) # Group-specific predictions. predictions_group0 = tf.boolean_mask(predictions_tensor, mask=(z_train < 1)) num_examples0 = np.sum(z_train < 1) predictions_group1 = tf.boolean_mask(predictions_tensor, mask=(z_train > 0)) num_examples1 = np.sum(z_train > 0) # We use the TF Constrained Optimization (TFCO) library to set up the # constrained optimization problem. The library doesn't currently support best # responses for slack variables. So we maintain explicit Lagrange multipliers # for the slack variables, and let the library deal with the Lagrange # multipliers for the error rate constraint. # Since we need to perform a gradient descent update on the model parameters, # and an ascent update on the Lagrange multipliers on the slack variables, we # create a single "minimization" objective using stop gradients, where a # descent gradient update has the effect of minimizing over the model # parameters and maximizing over the Lagrange multipliers for the slack # variables. As noted above, the ascent update on the Lagrange multipliers for # the error rate constraint is done by the library internally. # Placeholders for Lagrange multipliers for the four slack variables. lambda_pos0 = tf.Variable(0.5, dtype=tf.float32, name="lambda_pos0") lambda_neg0 = tf.Variable(0.5, dtype=tf.float32, name="lambda_neg0") lambda_pos1 = tf.Variable(0.5, dtype=tf.float32, name="lambda_pos1") lambda_neg1 = tf.Variable(0.5, dtype=tf.float32, name="lambda_neg1") # Set up prediction rates and surrogate relaxations on them. p = np.mean(y_train) # Proportion of positives. # Positive and negative prediction rates for group 0 and group 1. ppr_group0 = tf.reduce_sum( tf.cast( tf.greater(predictions_group0, tf.zeros(num_examples0, dtype="float32")), "float32")) / num_examples0 npr_group0 = 1 - ppr_group0 ppr_group1 = tf.reduce_sum( tf.cast( tf.greater(predictions_group1, tf.zeros(num_examples1, dtype="float32")), "float32")) / num_examples1 npr_group1 = 1 - ppr_group1 # Hinge concave lower bounds on the positive and negative prediction rates. # In the gradient updates, these get negated and become convex upper bounds. # For group 0: ppr_hinge_group0 = tf.reduce_sum( 1 - tf.nn.relu(1 - predictions_group0)) * 1.0 / num_examples0 npr_hinge_group0 = tf.reduce_sum( 1 - tf.nn.relu(1 + predictions_group0)) * 1.0 / num_examples0 # For group 1: ppr_hinge_group1 = tf.reduce_sum( 1 - tf.nn.relu(1 - predictions_group1)) * 1.0 / num_examples1 npr_hinge_group1 = tf.reduce_sum( 1 - tf.nn.relu(1 + predictions_group1)) * 1.0 / num_examples1 # Set up KL-divergence objective for constrained optimization. # We use stop gradients to ensure that a single descent gradient update on the # objective has the effect of minimizing over the model parameters and # maximizing over the Lagrange multipliers for the slack variables. # KL-divergence for group 0. kld_hinge_pos_group0 = (-tf.stop_gradient(lambda_pos0) * ppr_hinge_group0 - p * tf.log(lambda_pos0) + lambda_pos0 * tf.stop_gradient(ppr_group0)) kld_hinge_neg_group0 = (-tf.stop_gradient(lambda_neg0) * npr_hinge_group0 - (1 - p) * tf.log(lambda_neg0) + lambda_neg0 * tf.stop_gradient(npr_group0)) kld_hinge_group0 = kld_hinge_pos_group0 + kld_hinge_neg_group0 # KL-divergence for group 1. kld_hinge_pos_group1 = (-tf.stop_gradient(lambda_pos1) * ppr_hinge_group1 - p * tf.log(lambda_pos1) + lambda_pos1 * tf.stop_gradient(ppr_group1)) kld_hinge_neg_group1 = (-tf.stop_gradient(lambda_neg1) * npr_hinge_group1 - (1 - p) * tf.log(lambda_neg1) + lambda_neg1 * tf.stop_gradient(npr_group1)) kld_hinge_group1 = kld_hinge_pos_group1 + kld_hinge_neg_group1 # Wrap the objective into a rate object. objective = tfco.wrap_rate(kld_hinge_group0 + kld_hinge_group1) # Set up error rate constraint for constrained optimization. context = tfco.rate_context(predictions_tensor, labels_tensor) error = tfco.error_rate(context) constraints = [error <= additive_slack] # Cretae rate minimization problem object. problem = tfco.RateMinimizationProblem(objective, constraints) # Set up optimizer. optimizer = tfco.LagrangianOptimizerV1( tf.train.AdamOptimizer(learning_rate=learning_rate), constraint_optimizer=tf.train.AdamOptimizer( learning_rate=learning_rate_constraint)) train_op = optimizer.minimize(problem) # Start TF session and initialize variables. session = tf.Session() session.run(tf.global_variables_initializer()) # We maintain a list of objectives and model weights during training. objectives = [] violations = [] models = [] # Perform full gradient updates. for ii in range(loops): # Gradient updates. session.run(train_op) # Checkpoint once in 10 iterations. if ii % 10 == 0: # Model weights. model = [session.run(weights), session.run(threshold)] models.append(model) # Objective. klds = evaluation.expected_group_klds(x_train, y_train, z_train, [model], [1.0]) objectives.append(sum(klds)) # Violation. error = evaluation.expected_error_rate(x_train, y_train, [model], [1.0]) violations.append([error - additive_slack]) # Use the recorded objectives and constraints to find the best iterate. best_iterate = tfco.find_best_candidate_index(np.array(objectives), np.array(violations)) deterministic_model = models[best_iterate] # Use shrinking to find a sparse distribution over iterates. probabilities = tfco.find_best_candidate_distribution( np.array(objectives), np.array(violations)) models_pruned = [ models[i] for i in range(len(models)) if probabilities[i] > 0.0 ] probabilities_pruned = probabilities[probabilities > 0.0] return (models_pruned, probabilities_pruned), deterministic_model