def extract_glimpses(self, images, locations):
        """Extracts fovea-like glimpses.

    Args:
      images: 4-D Tensor of shape [batch, height, width, channels].
      locations: 2D Tensor of shape [batch, 2] with glimpse locations. Locations
        are in the interval of [-1, 1] where points:
        (-1, -1): upper left corner.
        (-1, 1): upper right corner.
        (1, 1): lower right corner.
        (1, -1): lower left corner.

    Returns:
      glimpses: 5D tensor of size [batch, # glimpses, height, width, channels].
    """
        # Get multi resolution fields of view (first is full resolution)
        image_shape = tf.cast(tf.shape(images)[1:3], dtype=tf.float32)
        start = tf.cast(self.glimpse_shape[0],
                        dtype=tf.float32) / image_shape[0]
        fields_of_view = tf.cast(tf.lin_space(start, 1., self.num_resolutions),
                                 dtype=tf.float32)
        receptive_fields = [self.glimpse_shape] + [
            tf.cast(fields_of_view[i] * image_shape, dtype=tf.int32)
            for i in range(1, self.num_resolutions)
        ]
        images_glimpses_list = []
        for field in receptive_fields:
            # Extract a glimpse with specific shape and scale.
            images_glimpse = utils.extract_glimpse(images,
                                                   size=field,
                                                   offsets=locations)
            # Bigger receptive fields have lower resolution.
            images_glimpse = tf.image.resize_images(images_glimpse,
                                                    size=self.glimpse_shape)
            # Stop gradient
            if self.apply_stop_gradient:
                images_glimpse = tf.stop_gradient(images_glimpse)
            images_glimpses_list.append(images_glimpse)
        return images_glimpses_list
示例#2
0
def td_learning(v_tm1, r_t, pcont_t, v_t, name="TDLearning"):
    """Implements the TD(0)-learning loss as a TensorFlow op.

  The TD loss is `0.5` times the squared difference between `v_tm1` and
  the target `r_t + pcont_t * v_t`.

  See "Learning to Predict by the Methods of Temporal Differences" by Sutton.
  (https://link.springer.com/article/10.1023/A:1022633531479).

  Args:
    v_tm1: Tensor holding values at previous timestep, shape `[B]`.
    r_t: Tensor holding rewards, shape `[B]`.
    pcont_t: Tensor holding pcontinue values, shape `[B]`.
    v_t: Tensor holding values at current timestep, shape `[B]`.
    name: name to prefix ops created by this function.

  Returns:
    A namedtuple with fields:

    * `loss`: a tensor containing the batch of losses, shape `[B]`.
    * `extra`: a namedtuple with fields:
        * `target`: batch of target values for `v_tm1`, shape `[B]`.
        * `td_error`: batch of temporal difference errors, shape `[B]`.
  """
    # Rank and compatibility checks.
    base_ops.wrap_rank_shape_assert([[v_tm1, v_t, r_t, pcont_t]], [1], name)

    # TD(0)-learning op.
    with tf.name_scope(name, values=[v_tm1, r_t, pcont_t, v_t]):

        # Build target.
        target = tf.stop_gradient(r_t + pcont_t * v_t)

        # Temporal difference error and loss.
        # Loss is MSE scaled by 0.5, so the gradient is equal to the TD error.
        td_error = target - v_tm1
        loss = 0.5 * tf.square(td_error)
        return base_ops.LossOutput(loss, TDExtra(target, td_error))
示例#3
0
    def __init__(self, sess, state_dim, action_dim, learning_rate):
        self._entropy = 5.
        self.quality = 0
        self.s_dim = state_dim
        self.a_dim = action_dim
        self.lr_rate = learning_rate
        self.sess = sess
        self.R = tf.placeholder(tf.float32, [None, 1])
        self.inputs = tf.placeholder(tf.float32, [None, self.s_dim[0], self.s_dim[1]])
        self.old_pi = tf.placeholder(tf.float32, [None, self.a_dim])
        self.acts = tf.placeholder(tf.float32, [None, self.a_dim])
        self.entropy_weight = tf.placeholder(tf.float32)
        self.pi, self.val = self.CreateNetwork(inputs=self.inputs)
        self.real_out = tf.clip_by_value(self.pi, ACTION_EPS, 1. - ACTION_EPS)
        self.log_prob = tf.log(tf.reduce_sum(tf.multiply(self.real_out, self.acts), reduction_indices=1, keepdims=True))
        self.entropy = tf.multiply(self.real_out, tf.log(self.real_out))
        self.adv = tf.stop_gradient(self.R - self.val)
        self.a2closs = self.log_prob * self.adv
        # Get all network parameters
        self.network_params = \
            tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='actor')

        # Set all network parameters
        self.input_network_params = []
        for param in self.network_params:
            self.input_network_params.append(
                tf.placeholder(tf.float32, shape=param.get_shape()))
        self.set_network_params_op = []
        for idx, param in enumerate(self.input_network_params):
            self.set_network_params_op.append(
                self.network_params[idx].assign(param))
        
        self.loss = - tf.reduce_sum(self.a2closs) \
            + self.entropy_weight * tf.reduce_sum(self.entropy)
        
        self.optimize = tf.train.AdamOptimizer(self.lr_rate).minimize(self.loss)
        self.val_loss = tflearn.mean_square(self.val, self.R)
        self.val_opt = tf.train.AdamOptimizer(self.lr_rate * 10.).minimize(self.val_loss)
示例#4
0
  def compute_loss(self,
                   done,
                   new_state,
                   memory,
                   gamma=0.99):
    if done:
      reward_sum = 0.  # terminal
    else:
      reward_sum = self.local_model(
          tf.convert_to_tensor(new_state[None, :],
                               dtype=tf.float32))[-1].numpy()[0]

    # Get discounted rewards
    discounted_rewards = []
    for reward in memory.rewards[::-1]:  # reverse buffer r
      reward_sum = reward + gamma * reward_sum
      discounted_rewards.append(reward_sum)
    discounted_rewards.reverse()

    logits, values = self.local_model(
        tf.convert_to_tensor(np.vstack(memory.states),
                             dtype=tf.float32))
    # Get our advantages
    advantage = tf.convert_to_tensor(np.array(discounted_rewards)[:, None],
                            dtype=tf.float32) - values
    # Value loss
    value_loss = advantage ** 2

    # Calculate our policy loss
    policy = tf.nn.softmax(logits)
    entropy = tf.nn.softmax_cross_entropy_with_logits_v2(labels=policy, logits=logits)

    policy_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=memory.actions,
                                                                 logits=logits)
    policy_loss *= tf.stop_gradient(advantage)
    policy_loss -= 0.01 * entropy
    total_loss = tf.reduce_mean((0.5 * value_loss + policy_loss))
    return total_loss
示例#5
0
    def train(self,
              X,
              lr=0.01,
              batch_size=64,
              epochs=5,
              modelname='vh_weights'):
        with self.graph.as_default():
            print(self.bv.shape)
            tf_v = tf.placeholder(tf.float32, [batch_size, self.bv.shape[0]])
            v = tf.round(tf_v)
            vk = tf.identity(v)

            i = tf.constant(0)
            _, _, vk = tf.while_loop(
                cond=lambda i, k, *args: i <= k,
                body=self.gibbs_step,
                loop_vars=[i, tf.constant(self.cd_steps), vk],
                parallel_iterations=1,
                back_prop=False)

            vk = tf.stop_gradient(vk)
            loss = self.energy(v) - self.energy(vk)
            optimizer = tf.train.AdamOptimizer(lr).minimize(loss)
            init = tf.global_variables_initializer()

        saver = tf.train.Saver([self.W])
        with tf.Session(graph=self.graph) as sess:
            init.run()
            for epoch in range(epochs):
                losses = []
                for i in range(0, len(X) - batch_size, batch_size):
                    x_batch = X[i:i + batch_size]
                    l, _ = sess.run([loss, optimizer],
                                    feed_dict={tf_v: x_batch})
                    losses.append(l)
                print('Epoch Cost %d: ' % (epoch), np.mean(losses))
            self.modelW = self.W.eval()
            saver.save(sess, modelname + str(self.nh))
 def _build_q_loss(self, batch):
     s1 = batch['s1']
     s2 = batch['s2']
     a1 = batch['a1']
     r = batch['r']
     dsc = batch['dsc']
     _, a2_p, log_pi_a2_p = self._p_fn(s2)
     q2_targets = []
     q1_preds = []
     for q_fn, q_fn_target in self._q_fns:
         q2_target_ = q_fn_target(s2, a2_p)
         q1_pred = q_fn(s1, a1)
         q1_preds.append(q1_pred)
         q2_targets.append(q2_target_)
     q2_targets = tf.stack(q2_targets, axis=-1)
     q2_target = self._ensemble_q2_target(q2_targets)
     v2_target = q2_target - self._get_alpha_entropy() * log_pi_a2_p
     if self._value_penalty:
         div_estimate = self._div_estimate(s2)
         v2_target = v2_target - self._get_alpha() * div_estimate
     q1_target = tf.stop_gradient(r + dsc * self._discount * v2_target)
     q_losses = []
     for q1_pred in q1_preds:
         q_loss_ = tf.reduce_mean(tf.square(q1_pred - q1_target))
         q_losses.append(q_loss_)
     q_loss = tf.add_n(q_losses)
     q_w_norm = self._get_q_weight_norm()
     norm_loss = self._weight_decays[0] * q_w_norm
     loss = q_loss + norm_loss
     # info
     info = collections.OrderedDict()
     info['q_loss'] = q_loss
     info['q_norm'] = q_w_norm
     info['r_mean'] = tf.reduce_mean(r)
     info['dsc_mean'] = tf.reduce_mean(dsc)
     info['q2_target_mean'] = tf.reduce_mean(q2_target)
     info['q1_target_mean'] = tf.reduce_mean(q1_target)
     return loss, info
def shake_shake_branch(x, output_filters, stride, rand_forward, rand_backward,
                       hparams):
    """Building a 2 branching convnet."""
    is_training = hparams.mode == tf.estimator.ModeKeys.TRAIN
    x = tf.nn.relu(x)
    x = tf.layers.conv2d(x,
                         output_filters, (3, 3),
                         strides=(stride, stride),
                         padding="SAME",
                         name="conv1")
    x = tf.layers.batch_normalization(x, training=is_training, name="bn1")
    x = tf.nn.relu(x)
    x = tf.layers.conv2d(x,
                         output_filters, (3, 3),
                         padding="SAME",
                         name="conv2")
    x = tf.layers.batch_normalization(x, training=is_training, name="bn2")
    if is_training:
        x = x * rand_backward + tf.stop_gradient(x * rand_forward -
                                                 x * rand_backward)
    else:
        x *= 1.0 / hparams.shake_shake_num_branches
    return x
示例#8
0
def sqrt_sparsity(motion_map):
    """A regularizer that encourages sparsity.

  This regularizer penalizes nonzero values. Close to zero it behaves like an L1
  regularizer, and far away from zero its strength decreases. The scale that
  distinguishes "close" from "far" is the mean value of the absolute of
  `motion_map`.

  Args:
     motion_map: A tf.Tensor of shape [B, H, W, C]

  Returns:
     A scalar tf.Tensor, the regularizer to be added to the training loss.
  """
    with tf.name_scope('drift'):
        tensor_abs = tf.abs(motion_map)
        mean = tf.stop_gradient(
            tf.reduce_mean(tensor_abs, axis=[1, 2], keep_dims=True))
        # We used L0.5 norm here because it's more sparsity encouraging than L1.
        # The coefficients are designed in a way that the norm asymptotes to L1 in
        # the small value limit.
        return tf.reduce_mean(2 * mean * tf.sqrt(tensor_abs /
                                                 (mean + 1e-24) + 1))
示例#9
0
    def bit_to_int(self, x_bit, num_bits, base=2):
        """Turn x_bit representing numbers bitwise (lower-endian) to int tensor.

    Args:
        x_bit: Tensor containing numbers in a particular base to be
        converted to
        int.
        num_bits: Number of bits in the representation.
        base: Base of the representation.

    Returns:
        Integer representation of this number.
    """
        x_l = tf.stop_gradient(tf.to_int32(tf.reshape(x_bit, [-1, num_bits])))
        # pylint: disable=g-complex-comprehension
        x_labels = [
            x_l[:, i] * tf.to_int32(base)**tf.to_int32(i)
            for i in range(num_bits)
        ]
        res = sum(x_labels)
        return tf.to_int32(
            tf.reshape(res,
                       common_layers.shape_list(x_bit)[:-1]))
示例#10
0
def pixels_from_softmax(frame_logits,
                        pure_sampling=False,
                        temperature=1.0,
                        gumbel_noise_factor=0.2):
    """Given frame_logits from a per-pixel softmax, generate colors."""
    # If we're purely sampling, just sample each pixel.
    if pure_sampling or temperature == 0.0:
        return common_layers.sample_with_temperature(frame_logits, temperature)

    # Gumbel-sample from the pixel sofmax and average by pixel values.
    pixel_range = tf.to_float(tf.range(256))
    for _ in range(len(frame_logits.get_shape().as_list()) - 1):
        pixel_range = tf.expand_dims(pixel_range, axis=0)

    frame_logits = tf.nn.log_softmax(frame_logits)
    gumbel_samples = discretization.gumbel_sample(
        common_layers.shape_list(frame_logits)) * gumbel_noise_factor

    frame = tf.nn.softmax((frame_logits + gumbel_samples) / temperature,
                          axis=-1)
    result = tf.reduce_sum(frame * pixel_range, axis=-1)
    # Round on the forward pass, not on the backward one.
    return result + tf.stop_gradient(tf.round(result) - result)
示例#11
0
def make_graph():
	uniform = tfp.distributions.Uniform()
	input_tensor = uniform.sample((batch_size_tensor, target_fun.n_dims))
	target_tensor = target_fun.eval_tf(input_tensor)

	current_tensor = encoding(input_tensor, False, "encoding")

	for i in range(config["network"]["n_hidden_layers"]):
		current_tensor = linear_layer(current_tensor, config["network"]["n_neurons"], tf.float16, f"fc{i}", False)
		current_tensor = activation(current_tensor, config["network"]["activation"])

	output_tensor = linear_layer(current_tensor, target_fun.n_channels, tf.float16, f"fc_out", False)
	output_tensor = activation(output_tensor, config["network"]["output_activation"])

	relative_l2_error = (target_tensor - output_tensor)**2 / (tf.stop_gradient(output_tensor)**2 + 0.01)
	loss = tf.math.reduce_mean(relative_l2_error)

	LOSS_SCALE = 128
	variables = tf.trainable_variables()
	gradients, _ = compute_gradients(loss, variables, LOSS_SCALE)
	train_op, _ = get_train_op(config, variables, gradients, optimizer)

	return train_op, loss, input_tensor, output_tensor
def cw_attack(sess, x, logits, n_ae, final=False):
    cw_attack_obj = CarliniWagnerL2(logits, sess=sess, back='tf')
    if final:
        cw_params = {
            'binary_search_steps': 9,
            'max_iterations': 2000,
            'learning_rate': 0.01,
            'initial_const': 1.0,
            'abort_early': True,
            'batch_size': n_ae
        }
    else:
        cw_params = {
            'binary_search_steps': 5,
            'max_iterations': 500,
            'learning_rate': 0.01,
            'initial_const': 1.0,
            'batch_size':
            n_ae  # need to specify, since CarliniWagnerL2 is not completely symbolic
        }
    adv_ex_tensor = cw_attack_obj.generate(x, **cw_params)
    adv_ex_tensor = tf.stop_gradient(adv_ex_tensor)
    return adv_ex_tensor
示例#13
0
 def _get_fake_data(self, inputs, mlm_logits):
     """Sample from the generator to create corrupted input."""
     inputs = pretrain_helpers.unmask(inputs)
     disallow = tf.one_hot(
         inputs.masked_lm_ids,
         depth=self._bert_config.vocab_size,
         dtype=tf.float32) if self._config.disallow_correct else None
     sampled_tokens = tf.stop_gradient(
         pretrain_helpers.sample_from_softmax(mlm_logits /
                                              self._config.temperature,
                                              disallow=disallow))
     sampled_tokids = tf.argmax(sampled_tokens, -1, output_type=tf.int32)
     updated_input_ids, masked = pretrain_helpers.scatter_update(
         inputs.input_ids, sampled_tokids, inputs.masked_lm_positions)
     labels = masked * (1 - tf.cast(
         tf.equal(updated_input_ids, inputs.input_ids), tf.int32))
     updated_inputs = pretrain_data.get_updated_inputs(
         inputs, input_ids=updated_input_ids)
     FakedData = collections.namedtuple(
         "FakedData", ["inputs", "is_fake_tokens", "sampled_tokens"])
     return FakedData(inputs=updated_inputs,
                      is_fake_tokens=labels,
                      sampled_tokens=sampled_tokens)
示例#14
0
    def build_trainer(self, child_model):
        """Build the train ops by connecting Controller with a Child."""
        # actor
        self.valid_loss = tf.to_float(child_model.rl_loss)
        self.valid_loss = tf.stop_gradient(self.valid_loss)
        self.valid_ppl = tf.exp(self.valid_loss)
        self.reward = REWARD_CONSTANT / self.valid_ppl

        if self.params.controller_entropy_weight:
            self.reward += self.params.controller_entropy_weight * self.sample_entropy

        # or baseline
        self.sample_log_probs = tf.reduce_sum(self.sample_log_probs)
        self.baseline = tf.Variable(0.0, dtype=tf.float32, trainable=False)
        baseline_update = tf.assign_sub(
            self.baseline, ((1 - self.params.controller_baseline_dec) *
                            (self.baseline - self.reward)))

        with tf.control_dependencies([baseline_update]):
            self.reward = tf.identity(self.reward)
        self.loss = self.sample_log_probs * (self.reward - self.baseline)

        self.train_step = tf.Variable(0,
                                      dtype=tf.int32,
                                      trainable=False,
                                      name='train_step')
        tf_vars = [
            var for var in tf.trainable_variables()
            if var.name.startswith(self.name)
        ]

        self.train_op, self.optimizer, self.grad_norm = _build_train_op(
            loss=self.loss,
            tf_vars=tf_vars,
            learning_rate=self.params.controller_learning_rate,
            train_step=self.train_step,
            num_aggregate=self.params.controller_num_aggregate)
示例#15
0
def mine(x, z, name_net="estimator_network"):
    """Computes I(X, Z).

  Uses the algorithm in "Mutual Information Neural Estimation"
  (https://arxiv.org/pdf/1801.04062.pdf).

  Args:
    x: Samples from x [batch_size, size_x].
    z: Samples from z [batch_size, size_z].
    name_net: Scope for the variables forming the network.

  Returns:
    Estimate of the mutual information and the update op for the optimizer.
  """
    z_shuffled = vae.shuffle_codes(z)

    concat_x_x = tf.concat([x, x], axis=0)
    concat_z_z_shuffled = tf.stop_gradient(tf.concat([z, z_shuffled], axis=0))

    with tf.variable_scope(name_net, reuse=tf.AUTO_REUSE):
        d1_x = tf.layers.dense(concat_x_x, 20, name="d1_x")
        d1_z = tf.layers.dense(concat_z_z_shuffled, 20, name="d1_z")
        d1 = tf.nn.elu(d1_x + d1_z, name="d1")
        d2 = tf.layers.dense(d1, 1, name="d2")

    batch_size = tf.shape(x)[0]
    pred_x_z = d2[:batch_size]
    pred_x_z_shuffled = d2[batch_size:]
    loss = -(tf.reduce_mean(pred_x_z, axis=0) + tf.math.log(
        tf.to_float(batch_size)) - tf.math.reduce_logsumexp(pred_x_z_shuffled))
    all_variables = tf.trainable_variables()
    mine_vars = [
        var for var in all_variables if "estimator_network" in var.name
    ]
    mine_op = tf.train.AdamOptimizer(learning_rate=0.01).minimize(
        loss=loss, var_list=mine_vars)
    return -loss, mine_op
示例#16
0
def build_new_parameters(
    loss: tf.Tensor,
    parameters: Dict[str, tf.Tensor],
    optimizer: tf.train.Optimizer,
    first_order: bool = False,
):
    """Builds new parameters via an optimization step on the provided loss.

    Parameters
    ----------
    loss : <float32> [] tensor
        A scalar tensor that represents the loss.

    parameters : dict of variables or tensors
        A dictionary of initial parameters.

    optimizer : Optimizer
        An optimizer used for computing parameter updates.

    first_order : bool, optional (default: False)
        If True, gradients of the parameters computed by the optimizer are
        added to the graph as constants. This will zeros out the second order
        terms under subsequent differentiation.

    Returns
    -------
    new_parameters : dict of tensors
        A dictionary of update parameters.
    """
    param_names, param_values = zip(*parameters.items())
    grads_and_vars = optimizer.compute_gradients(loss, param_values)
    # Prevent backprop through the gradients, if necessary.
    if first_order:
        grads_and_vars = [(tf.stop_gradient(g), v) for g, v in grads_and_vars]
    new_parameters = dict(
        zip(param_names, optimizer.compute_updates(grads_and_vars)))
    return new_parameters
示例#17
0
    def inner(*args, **kwargs):
        result = tf.stop_gradient(f(*args, **kwargs))
        scope = tf.get_default_graph().get_name_scope()

        def grad(dresult, variables=None):
            with tf.GradientTape() as t:
                t.watch(args)
                if variables is not None:
                    t.watch(variables)
                # we need to outsmart XLA here to force a control dependency
                zero_with_control_dependency = tf.reduce_mean(dresult[0] *
                                                              1e-30)
                new_args = []
                for a in args:
                    if a.dtype.is_floating:
                        new_args.append(
                            a + tf.cast(zero_with_control_dependency, a.dtype))
                    else:
                        new_args.append(a)

                with tf.control_dependencies([dresult]):
                    if bf16:
                        with tf.tpu.bfloat16_scope():
                            with tf.variable_scope(scope, reuse=True):
                                result = f(*new_args, **kwargs)
                    else:
                        with tf.variable_scope(scope, reuse=True):
                            result = f(*new_args, **kwargs)
            kw_vars = []
            if variables is not None:
                kw_vars = list(variables)
            grads = t.gradient(result,
                               list(new_args) + kw_vars,
                               output_gradients=[dresult])
            return grads[:len(new_args)], grads[len(new_args):]

        return result, grad
def _coordinates_to_heatmap_sparse(y_grid, x_grid, y_coordinates, x_coordinates,
                                   sigma, channel_onehot, channel_weights=None):
  """Sparse version of coordinates to heatmap using tf.scatter."""

  if not hasattr(tf, 'tensor_scatter_nd_max'):
    raise RuntimeError(
        ('Please upgrade tensowflow to use `tensor_scatter_nd_max` or set '
         'compute_heatmap_sparse=False'))
  _, num_channels = (
      shape_utils.combined_static_and_dynamic_shape(channel_onehot))

  height, width = shape_utils.combined_static_and_dynamic_shape(y_grid)
  x_grid = tf.expand_dims(x_grid, 2)
  y_grid = tf.expand_dims(y_grid, 2)
  # The raw center coordinates in the output space.
  x_diff = x_grid - tf.math.floor(x_coordinates)
  y_diff = y_grid - tf.math.floor(y_coordinates)
  squared_distance = x_diff**2 + y_diff**2

  gaussian_map = tf.exp(-squared_distance / (2 * sigma * sigma))

  if channel_weights is not None:
    gaussian_map = gaussian_map * channel_weights[tf.newaxis, tf.newaxis, :]

  channel_indices = tf.argmax(channel_onehot, axis=1)

  channel_indices = channel_indices[:, tf.newaxis]
  heatmap_init = tf.zeros((num_channels, height, width))

  gaussian_map = tf.transpose(gaussian_map, (2, 0, 1))
  heatmap = tf.tensor_scatter_nd_max(
      heatmap_init, channel_indices, gaussian_map)

  # Maximum of an empty tensor is -inf, the following is to avoid that.
  heatmap = tf.maximum(heatmap, 0)

  return tf.stop_gradient(tf.transpose(heatmap, (1, 2, 0)))
示例#19
0
        def process_single_frame(prev_outputs, inputs):
            """Process a single frame of the video."""
            cur_image, input_reward, action = inputs
            time_step, prev_image, prev_reward, frame_buf, lstm_states = prev_outputs

            # sample from softmax (by argmax). this is noop for non-softmax loss.
            prev_image = self.get_sampled_frame(prev_image)

            generated_items = [prev_image]
            groundtruth_items = [cur_image]
            done_warm_start = tf.greater(time_step, context_frames - 1)
            input_image, = self.get_scheduled_sample_inputs(
                done_warm_start, groundtruth_items, generated_items, ss_func)

            # Prediction
            pred_image, lstm_states, _ = self.construct_predictive_tower(
                input_image, None, action, lstm_states, latent)

            if self.hparams.reward_prediction:
                reward_input_image = self.get_sampled_frame(pred_image)
                if self.hparams.reward_prediction_stop_gradient:
                    reward_input_image = tf.stop_gradient(reward_input_image)
                with tf.control_dependencies([time_step]):
                    frame_buf = [reward_input_image] + frame_buf[:-1]
                pred_reward = self.reward_prediction(frame_buf, None, action,
                                                     latent)
                pred_reward = common_video.decode_to_shape(
                    pred_reward, common_layers.shape_list(input_reward),
                    "reward_dec")
            else:
                pred_reward = prev_reward

            time_step += 1
            outputs = (time_step, pred_image, pred_reward, frame_buf,
                       lstm_states)

            return outputs
示例#20
0
def get_disc_loss(args, x, x_fake, score_func, z_outer, neg_kl_outer):
    opt_disc = tf.train.AdamOptimizer(learning_rate=args.learning_rate,
                                      beta1=args.beta1,
                                      beta2=args.beta2)

    fx = score_func(x, z_outer)
    f_fake_x = score_func(x_fake, z_outer)
    f_loss = tf.reduce_mean(-fx) + tf.reduce_mean(f_fake_x)

    loss = f_loss + tf.reduce_mean(-neg_kl_outer)
    if args.gp_lambda > 0:  # add gradient penalty
        alpha = tf.random.uniform(shape=(tf.shape(x)[0], 1, 1))
        x_hat = alpha * x + (1 - alpha) * x_fake
        d_hat = score_func(x_hat, tf.stop_gradient(z_outer))
        ddx = tf.gradients(d_hat, x_hat)[0]
        ddx = tf.sqrt(tf.reduce_sum(tf.square(ddx), axis=[1, 2]))
        ddx = tf.reduce_mean(tf.square(ddx - 1.0)) * args.gp_lambda
        loss = loss + ddx
    gvs = opt_disc.compute_gradients(
        loss, var_list=tf.trainable_variables(scope='score_func'))
    gvs = [(tf.where(tf.is_nan(grad), tf.zeros_like(grad), grad), val)
           for grad, val in gvs if grad is not None]
    train_disc = opt_disc.apply_gradients(gvs)
    return f_loss, train_disc
示例#21
0
 def __call__(self, inputs, state, scope=None):
   """Run the cell with the declared dropouts."""
   orig_inputs = inputs
   if self._input_keep_prob < 1:
     # When using functional_rnn, the forward pass will be recomputed in the
     # backprop. So we need to make the dropout layer deterministic between
     # farward and backward pass. So we use stateless random to make sure the
     # generated random number is deterministic with a given seed. We also
     # want the drop out to be random across different global steps and time
     # steps. So we put both of them to the seeds.
     seeds = tf.stop_gradient(
         tf.stack([
             tf.cast(self._global_step, tf.int32) + tf.reduce_sum(state[1]),
             int(hashlib.md5(
                 inputs.name.encode("utf-8")).hexdigest()[:8], 16) & 0x7FFFFFFF
         ]))
     keep_prob = tf.convert_to_tensor(
         self._input_keep_prob, dtype=tf.float32, name="keep_prob")
     random_tensor = keep_prob + contrib_stateless.stateless_random_uniform(
         inputs.get_shape(), seed=tf.cast(seeds, tf.int32), dtype=tf.float32)
     binary_tensor = tf.cast(tf.floor(random_tensor), inputs.dtype)
     inputs = tf.div(inputs, tf.cast(keep_prob, inputs.dtype)) * binary_tensor
   with tf.variable_scope("cell_fn", reuse=tf.AUTO_REUSE):
     state0 = state[0] if self._enable else state
     output, new_state = self._cell(inputs, state0, scope=scope)
   if self._seq_len is not None and self._enable:
     seq_len = tf.reshape(self._seq_len, [-1])
     padding = tf.reshape(state[1], [-1]) < (tf.reduce_max(seq_len) - seq_len)
     output = tf.where(padding, orig_inputs, output)
     new_state = contrib_rnn.LSTMStateTuple(
         tf.where(padding, state[0].c, new_state.c),
         tf.where(padding, state[0].h, new_state.h))
   if self._enable:
     return output, [new_state, state[1] + 1]
   else:
     return output, new_state
示例#22
0
    def _build(self, inputs):
        (shared_inputs, extra_policy_inputs) = inputs
        policy_in = tf.concat([shared_inputs, extra_policy_inputs], axis=1)

        policy = snt.nets.MLP(output_sizes=self._policy_layers,
                              activation=self._activation,
                              name='policy_mlp')(policy_in)

        # Sample an action from the policy logits.
        action = tf.multinomial(policy, num_samples=1, output_dtype=tf.int32)
        action = tf.squeeze(action, 1)  # [B, 1] -> [B]

        if self._policy_clip_abs_value > 0:
            policy = snt.clip_gradient(
                net=policy,
                clip_value_min=-self._policy_clip_abs_value,
                clip_value_max=self._policy_clip_abs_value)

        baseline_in = tf.concat(
            [shared_inputs, tf.stop_gradient(policy)], axis=1)
        baseline = snt.nets.MLP(self._baseline_layers,
                                activation=self._activation,
                                name='baseline_mlp')(baseline_in)
        baseline = tf.squeeze(baseline, axis=-1)  # [B, 1] -> [B]

        if self._policy_clip_abs_value > 0:
            baseline = snt.clip_gradient(
                net=baseline,
                clip_value_min=-self._policy_clip_abs_value,
                clip_value_max=self._policy_clip_abs_value)

        outputs = PolicyOutputs(policy=policy,
                                action=action,
                                baseline=baseline)

        return outputs
示例#23
0
    def _crop_pool_layer(self, bottom, rois, name):
        with tf.variable_scope(name):
            batch_ids = tf.squeeze(
                tf.slice(rois, [0, 0], [-1, 1], name="batch_id"), [1])
            # Get the normalized coordinates of bboxes
            bottom_shape = tf.shape(bottom)
            height = (tf.to_float(bottom_shape[1]) - 1.) * np.float32(
                self._feat_stride[0])
            width = (tf.to_float(bottom_shape[2]) - 1.) * np.float32(
                self._feat_stride[0])
            x1 = tf.slice(rois, [0, 1], [-1, 1], name="x1") / width
            y1 = tf.slice(rois, [0, 2], [-1, 1], name="y1") / height
            x2 = tf.slice(rois, [0, 3], [-1, 1], name="x2") / width
            y2 = tf.slice(rois, [0, 4], [-1, 1], name="y2") / height
            # Won't be backpropagated to rois anyway, but to save time
            bboxes = tf.stop_gradient(tf.concat([y1, x1, y2, x2], axis=1))
            pre_pool_size = cfg.FLAGS.roi_pooling_size * 2
            crops = tf.image.crop_and_resize(bottom,
                                             bboxes,
                                             tf.to_int32(batch_ids),
                                             [pre_pool_size, pre_pool_size],
                                             name="crops")

        return slim.max_pool2d(crops, [2, 2], padding='SAME')
示例#24
0
    def conditional_linear_classifier(self,
                                      inputs,
                                      n_classes,
                                      probs,
                                      add_bias=True):
        """"""

        input_shape = tf.shape(inputs)
        batch_size = input_shape[0]
        bucket_size = input_shape[1]
        input_size = inputs.get_shape().as_list()[-1]

        if len(probs.get_shape().as_list()) == 2:
            probs = tf.to_float(
                tf.one_hot(tf.to_int64(probs), bucket_size, 1, 0))
        else:
            probs = tf.stop_gradient(probs)

        if self.moving_params is None:
            keep_prob = self.mlp_keep_prob
        else:
            keep_prob = 1
        if isinstance(keep_prob, tf.Tensor) or keep_prob < 1:
            noise_shape = tf.stack([batch_size, 1, 1, input_size])
            inputs = tf.nn.dropout(inputs, keep_prob, noise_shape=noise_shape)

        lin = linalg.linear(inputs,
                            n_classes,
                            add_bias=add_bias,
                            initializer=tf.zeros_initializer,
                            moving_params=self.moving_params)
        weighted_lin = tf.batch_matmul(lin,
                                       tf.expand_dims(probs, 3),
                                       adj_x=True)

        return weighted_lin, lin
示例#25
0
    def __init__(self, auxiliary_task, predict_from_pixels, feat_dim=None, scope='dynamics'):
        self.scope = scope
        self.auxiliary_task = auxiliary_task
        self.hidsize = self.auxiliary_task.hidsize
        self.feat_dim = feat_dim
        self.obs = self.auxiliary_task.obs
        self.last_ob = self.auxiliary_task.last_ob
        self.ac = self.auxiliary_task.ac
        self.ac_space = self.auxiliary_task.ac_space
        self.ob_mean = self.auxiliary_task.ob_mean
        self.ob_std = self.auxiliary_task.ob_std
        #############################################
        # 여기는 수정부분

        #############################################
        if predict_from_pixels:
            self.features = self.get_features(self.obs, reuse=False)
        else:
            self.features = tf.stop_gradient(self.auxiliary_task.features)

        self.out_features = self.auxiliary_task.next_features

        with tf.variable_scope(self.scope + "_loss"):
            self.loss = self.get_loss()
def pgd_attack(clean_inputs, clean_labels, logits, p_norm, eps, pgd_n_iter):
    """ Symbolic definition of the PGD attack """

    attack = MadryEtAl(logits)
    attack_params = {
        'nb_iter': pgd_n_iter,
        'clip_min': 0.0,
        'clip_max': 1.0,
        'y': clean_labels,
        'ord': p_norm,
        'eps': eps
    }
    if p_norm == np.inf:
        attack_params['eps_iter'] = attack_params['eps'] / pgd_n_iter * 2
        attack_params['pgd_update'] = 'sign'
    elif p_norm == 2:
        attack_params['eps_iter'] = attack_params['eps'] / pgd_n_iter * 2
        attack_params['pgd_update'] = 'plain'
    else:
        raise Exception('Wrong p_norm.')

    adv_ex_tensor = attack.generate(clean_inputs, **attack_params)
    adv_ex_tensor = tf.stop_gradient(adv_ex_tensor)
    return adv_ex_tensor
示例#27
0
def reinforce_loss_discrete(classification_logits_t,
                            classification_labels_t,
                            locations_logits_t,
                            locations_labels_t,
                            use_punishment=False):
    """Computes REINFORCE loss for contentious discrete action spaces.

  Args:
    classification_logits_t: List of classification logits at each time point.
    classification_labels_t: List of classification labels at each time point.
    locations_logits_t: List of location logits at each time point.
    locations_labels_t: List of location labels at each time point.
    use_punishment: (Boolean) Reward {-1, 1} if true else {0, 1}.

  Returns:
    reinforce_loss: REINFORCE loss.
  """
    classification_logits = tf.concat(classification_logits_t, axis=0)
    classification_labels = tf.concat(classification_labels_t, axis=0)
    locations_logits = tf.concat(locations_logits_t, axis=0)
    locations_labels = tf.concat(locations_labels_t, axis=0)
    rewards = tf.cast(tf.equal(
        tf.argmax(classification_logits,
                  axis=1,
                  output_type=classification_labels.dtype),
        classification_labels),
                      dtype=tf.float32)  # size (batch_size) each
    if use_punishment:
        # Rewards is \in {-1 and 1} instead of {0, 1}.
        rewards = 2. * rewards - 1.
    neg_advs = tf.stop_gradient(rewards - tf.reduce_mean(rewards))
    log_prob = -tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=locations_logits, labels=locations_labels)

    loss = -tf.reduce_mean(neg_advs * log_prob)
    return loss
  def mode(self, straight_through_gradient=False, maximum=False):
    """Mode of the distribution.
    Args:
      straight_through_gradient: Boolean; if True, it uses the straight-through
        gradient estimator for the mode. Otherwise there is no gradient
        with respect to the mixing coefficients due to the `argmax` op.
      maximum: if True, attempt to return the highest-density mode.
    Returns:
      Mode.
    """
    mode_value = self._distributions.mode()
    mixing_log_prob = self.mixing_log_prob

    if maximum:
      mixing_log_prob += self._maybe_mask(self._component_log_prob(mode_value))

    mask = tf.one_hot(tf.argmax(mixing_log_prob, axis=1),
                      mixing_log_prob.shape[1], axis=1)

    if straight_through_gradient:
      soft_mask = tf.nn.softmax(mixing_log_prob, axis=1)
      mask = tf.stop_gradient(mask - soft_mask) + soft_mask

    return tf.reduce_sum(mask * mode_value, 1)
示例#29
0
    def _build_train_op(self):
        """Builds a training op.

    Returns:
      train_op: An op performing one step of training from replay data.
    """
        replay_action_one_hot = tf.one_hot(self._replay.actions,
                                           self.num_actions,
                                           1.,
                                           0.,
                                           name='action_one_hot')
        replay_chosen_q = tf.reduce_sum(self._replay_net_outputs.q_values *
                                        replay_action_one_hot,
                                        reduction_indices=1,
                                        name='replay_chosen_q')

        target = tf.stop_gradient(self._build_target_q_op())
        loss = tf.losses.huber_loss(target,
                                    replay_chosen_q,
                                    reduction=tf.losses.Reduction.NONE)
        if self.summary_writer is not None:
            with tf.variable_scope('Losses'):
                tf.summary.scalar('HuberLoss', tf.reduce_mean(loss))
        return self.optimizer.minimize(tf.reduce_mean(loss))
示例#30
0
def lagrangian_optimizer_kld(train_set, additive_slack, learning_rate,
                             learning_rate_constraint, loops):
    """Implements surrogate-based Lagrangian optimizer (Algorithm 2).

  Specifically solves:
    min_{theta} sum_{G = 0, 1} KLD(p, pprG(theta))
      s.t. error_rate <= additive_slack,
    where p is the overall proportion of positives and pprG is the positive
    prediction rate for group G.

  We frame this as a constrained optimization problem:
    min_{theta, xi_pos0, xi_pos1, xi_neg0, xi_neg1} {
      -p log(xi_pos0) - (1-p) log(xi_neg0) - p log(xi_pos1)
        -(1-p) log(xi_neg1)}
    s.t.
      error_rate <= additive_slack,
        xi_pos0 <= ppr0(theta), xi_neg0 <= npr0(theta),
        xi_pos1 <= ppr1(theta), xi_neg1 <= npr1(theta),
  and formulate the Lagrangian:
    max_{lambda's >= 0} min_{xi's} {
      -p log(xi_pos0) - (1-p) log(xi_neg0) - p log(xi_pos1)
        -(1-p) log(xi_neg1)
       + lambda_pos0 (xi_pos0 - ppr0(theta))
       + lambda_neg0 (xi_neg0 - npr0(theta))
       + lambda_pos1 (xi_pos1 - ppr1(theta))
       + lambda_neg1 (xi_neg1 - npr1(theta))}
    s.t.
      error_rate <= additive_slack.

  We do best response for the slack variables xi:
    BR for xi_pos0 = p / lambda_pos0
    BR for xi_neg0 = (1 - p) / lambda_neg0
    BR for xi_pos1 = p / lambda_pos1
    BR for xi_neg1 = (1 - p) / lambda_neg1
  We do gradient ascent on the lambda's, where
    Gradient w.r.t. lambda_pos0
      = BR for xi_pos0 - ppr0(theta)
      = p / lambda_pos0 - ppr0(theta)
      = Gradient w.r.t. lambda_pos0 of
        (p log(lambda_pos0) - lambda_pos0 ppr0(theta))
    Gradient w.r.t. lambda_neg0
      = Gradient w.r.t. lambda_neg0 of
        ((1 - p) log(lambda_neg0) - lambda_neg0 npr0(theta))
    Gradient w.r.t. lambda_pos1
      = Gradient w.r.t. lambda_pos1 of
        (p log(lambda_pos1) - lambda_pos1 ppr1(theta))
    Gradient w.r.t. lambda_neg1
      = Gradient w.r.t. lambda_neg1 of
        ((1 - p) log(lambda_neg1) - lambda_neg1 npr1(theta)).
  We do gradient descent on thetas's, with ppr's and npr's replaced with hinge
  surrogates. We use concave lower bounds on ppr's and npr's, so that when they
  get negated in the updates, we get convex upper bounds.

  See Appendix D.1 in the paper for more details.

  Args:
    train_set: (features, labels, groups)
    additive_slack: float, additive slack on error rate constraint
    learning_rate: float, learning rate for model parameters
    learning_rate_constraint: float, learning rate for Lagrange multipliers
    loops: int, number of iterations

  Returns:
    stochastic_model containing list of models and probabilities,
    deterministic_model.
  """
    x_train, y_train, z_train = train_set
    dimension = x_train.shape[-1]

    tf.reset_default_graph()

    # Data tensors.
    features_tensor = tf.constant(x_train.astype("float32"), name="features")
    labels_tensor = tf.constant(y_train.astype("float32"), name="labels")

    # Linear model.
    weights = tf.Variable(tf.zeros(dimension, dtype=tf.float32),
                          name="weights")
    threshold = tf.Variable(0, name="threshold", dtype=tf.float32)
    predictions_tensor = (tf.tensordot(features_tensor, weights, axes=(1, 0)) +
                          threshold)

    # Group-specific predictions.
    predictions_group0 = tf.boolean_mask(predictions_tensor,
                                         mask=(z_train < 1))
    num_examples0 = np.sum(z_train < 1)
    predictions_group1 = tf.boolean_mask(predictions_tensor,
                                         mask=(z_train > 0))
    num_examples1 = np.sum(z_train > 0)

    # We use the TF Constrained Optimization (TFCO) library to set up the
    # constrained optimization problem. The library doesn't currently support best
    # responses for slack variables. So we maintain explicit Lagrange multipliers
    # for the slack variables, and let the library deal with the Lagrange
    # multipliers for the error rate constraint.

    # Since we need to perform a gradient descent update on the model parameters,
    # and an ascent update on the Lagrange multipliers on the slack variables, we
    # create a single "minimization" objective using stop gradients, where a
    # descent gradient update has the effect of minimizing over the model
    # parameters and maximizing over the Lagrange multipliers for the slack
    # variables. As noted above, the ascent update on the Lagrange multipliers for
    # the error rate constraint is done by the library internally.

    # Placeholders for Lagrange multipliers for the four slack variables.
    lambda_pos0 = tf.Variable(0.5, dtype=tf.float32, name="lambda_pos0")
    lambda_neg0 = tf.Variable(0.5, dtype=tf.float32, name="lambda_neg0")
    lambda_pos1 = tf.Variable(0.5, dtype=tf.float32, name="lambda_pos1")
    lambda_neg1 = tf.Variable(0.5, dtype=tf.float32, name="lambda_neg1")

    # Set up prediction rates and surrogate relaxations on them.
    p = np.mean(y_train)  # Proportion of positives.

    # Positive and negative prediction rates for group 0 and group 1.
    ppr_group0 = tf.reduce_sum(
        tf.cast(
            tf.greater(predictions_group0,
                       tf.zeros(num_examples0, dtype="float32")),
            "float32")) / num_examples0
    npr_group0 = 1 - ppr_group0
    ppr_group1 = tf.reduce_sum(
        tf.cast(
            tf.greater(predictions_group1,
                       tf.zeros(num_examples1, dtype="float32")),
            "float32")) / num_examples1
    npr_group1 = 1 - ppr_group1

    # Hinge concave lower bounds on the positive and negative prediction rates.
    # In the gradient updates, these get negated and become convex upper bounds.
    # For group 0:
    ppr_hinge_group0 = tf.reduce_sum(
        1 - tf.nn.relu(1 - predictions_group0)) * 1.0 / num_examples0
    npr_hinge_group0 = tf.reduce_sum(
        1 - tf.nn.relu(1 + predictions_group0)) * 1.0 / num_examples0
    # For group 1:
    ppr_hinge_group1 = tf.reduce_sum(
        1 - tf.nn.relu(1 - predictions_group1)) * 1.0 / num_examples1
    npr_hinge_group1 = tf.reduce_sum(
        1 - tf.nn.relu(1 + predictions_group1)) * 1.0 / num_examples1

    # Set up KL-divergence objective for constrained optimization.
    # We use stop gradients to ensure that a single descent gradient update on the
    # objective has the effect of minimizing over the model parameters and
    # maximizing over the Lagrange multipliers for the slack variables.

    # KL-divergence for group 0.
    kld_hinge_pos_group0 = (-tf.stop_gradient(lambda_pos0) * ppr_hinge_group0 -
                            p * tf.log(lambda_pos0) +
                            lambda_pos0 * tf.stop_gradient(ppr_group0))
    kld_hinge_neg_group0 = (-tf.stop_gradient(lambda_neg0) * npr_hinge_group0 -
                            (1 - p) * tf.log(lambda_neg0) +
                            lambda_neg0 * tf.stop_gradient(npr_group0))
    kld_hinge_group0 = kld_hinge_pos_group0 + kld_hinge_neg_group0

    # KL-divergence for group 1.
    kld_hinge_pos_group1 = (-tf.stop_gradient(lambda_pos1) * ppr_hinge_group1 -
                            p * tf.log(lambda_pos1) +
                            lambda_pos1 * tf.stop_gradient(ppr_group1))
    kld_hinge_neg_group1 = (-tf.stop_gradient(lambda_neg1) * npr_hinge_group1 -
                            (1 - p) * tf.log(lambda_neg1) +
                            lambda_neg1 * tf.stop_gradient(npr_group1))
    kld_hinge_group1 = kld_hinge_pos_group1 + kld_hinge_neg_group1

    # Wrap the objective into a rate object.
    objective = tfco.wrap_rate(kld_hinge_group0 + kld_hinge_group1)

    # Set up error rate constraint for constrained optimization.
    context = tfco.rate_context(predictions_tensor, labels_tensor)
    error = tfco.error_rate(context)
    constraints = [error <= additive_slack]

    # Cretae rate minimization problem object.
    problem = tfco.RateMinimizationProblem(objective, constraints)

    # Set up optimizer.
    optimizer = tfco.LagrangianOptimizerV1(
        tf.train.AdamOptimizer(learning_rate=learning_rate),
        constraint_optimizer=tf.train.AdamOptimizer(
            learning_rate=learning_rate_constraint))
    train_op = optimizer.minimize(problem)

    # Start TF session and initialize variables.
    session = tf.Session()
    session.run(tf.global_variables_initializer())

    # We maintain a list of objectives and model weights during training.
    objectives = []
    violations = []
    models = []

    # Perform full gradient updates.
    for ii in range(loops):

        # Gradient updates.
        session.run(train_op)

        # Checkpoint once in 10 iterations.
        if ii % 10 == 0:
            # Model weights.
            model = [session.run(weights), session.run(threshold)]
            models.append(model)

            # Objective.
            klds = evaluation.expected_group_klds(x_train, y_train, z_train,
                                                  [model], [1.0])
            objectives.append(sum(klds))

            # Violation.
            error = evaluation.expected_error_rate(x_train, y_train, [model],
                                                   [1.0])
            violations.append([error - additive_slack])

    # Use the recorded objectives and constraints to find the best iterate.
    best_iterate = tfco.find_best_candidate_index(np.array(objectives),
                                                  np.array(violations))
    deterministic_model = models[best_iterate]

    # Use shrinking to find a sparse distribution over iterates.
    probabilities = tfco.find_best_candidate_distribution(
        np.array(objectives), np.array(violations))
    models_pruned = [
        models[i] for i in range(len(models)) if probabilities[i] > 0.0
    ]
    probabilities_pruned = probabilities[probabilities > 0.0]

    return (models_pruned, probabilities_pruned), deterministic_model