Exemplo n.º 1
0
def artanh(x):
    if x.dtype == tf.float32:
        result = tf.atanh(tf.clip_by_value(x, clip_value_min=tf.constant([-1], dtype=tf.float32)+1e-7, clip_value_max=tf.constant([1],dtype=tf.float32)-1e-7))
    elif x.dtype == tf.float64:
        result = tf.atanh(tf.clip_by_value(x, clip_value_min=tf.constant([-1], dtype=tf.float64)+1e-16, clip_value_max=tf.constant([1],dtype=tf.float64)-1e-16))
    else:
        raise ValueError('invalid dtype!')
    return result
Exemplo n.º 2
0
    def pair_dy(self):
        """
        Rapidity difference between all pairs of particles.
        """
        # dy = y1 - y2 = atanh(beta1) - atanh(beta2)
        beta = tf.clip_by_value(self.beta(), self.epsilon, 1 - self.epsilon)
        dy = tf.atanh(tf.expand_dims(beta, axis=-1)) - tf.atanh(tf.expand_dims(beta, axis=-2))

        # return only upper triangle without diagonal
        return tf.gather(tf.reshape(dy, [-1, self.n**2]), self.triu_indices, axis=1)
Exemplo n.º 3
0
def soft_round_inverse(y, alpha, eps=1e-3):
    """Inverse of soft_round().

  This is described in Sec. 4.1. in the paper
  > "Universally Quantized Neural Compression"<br />
  > Eirikur Agustsson & Lucas Theis<br />
  > https://arxiv.org/abs/2006.09952

  Args:
    y: tf.Tensor. Inputs to this function.
    alpha: Float or tf.Tensor. Controls smoothness of the approximation.
    eps: Float. Threshold below which soft_round() is assumed to equal the
      identity function.

  Returns:
    tf.Tensor
  """
    # This guards the gradient of tf.where below against NaNs, while maintaining
    # correctness, as for alpha < eps the result is ignored.
    alpha_bounded = tf.maximum(alpha, eps)
    m = tf.floor(y) + .5
    s = (y - m) * (tf.tanh(alpha_bounded / 2.) * 2.)
    r = tf.atanh(s) / alpha_bounded
    # `r` must be between -.5 and .5 by definition. In case atanh becomes +-inf
    # due to numerical instability, this prevents the forward pass from yielding
    # infinite values. Note that it doesn't prevent the backward pass from
    # returning non-finite values.
    r = tf.clip_by_value(r, -.5, .5)

    # For very low alphas, soft_round behaves like identity.
    return tf.where(alpha < eps, y, m + r, name="soft_round_inverse")
Exemplo n.º 4
0
 def nlogp(self, dist, action):
     ''' negative logp of unnormalized action '''
     before_squahed_action = tf.atanh(
         tf.clip_by_value(action, -1 + EPS, 1 - EPS))
     log_likelihood = dist.log_prob(before_squahed_action)
     log_likelihood -= tf.reduce_sum(tf.log(1 - action**2 + EPS), axis=1)
     return -tf.reduce_mean(log_likelihood)
Exemplo n.º 5
0
  def testSampleFromDiscretizedMixLogistic(self):
    batch = 2
    height = 4
    width = 4
    num_mixtures = 5
    seed = 42
    logits = tf.concat(  # assign all probability mass to first component
        [tf.ones([batch, height, width, 1]) * 1e8,
         tf.zeros([batch, height, width, num_mixtures - 1])],
        axis=-1)
    locs = tf.random_uniform([batch, height, width, num_mixtures * 3],
                             minval=-.9, maxval=.9)
    log_scales = tf.ones([batch, height, width, num_mixtures * 3]) * -1e8
    coeffs = tf.atanh(tf.zeros([batch, height, width, num_mixtures * 3]))
    pred = tf.concat([logits, locs, log_scales, coeffs], axis=-1)

    locs_0 = locs[..., :3]
    expected_sample = tf.clip_by_value(locs_0, -1., 1.)

    actual_sample = common_layers.sample_from_discretized_mix_logistic(
        pred, seed=seed)
    actual_sample_val, expected_sample_val = self.evaluate(
        [actual_sample, expected_sample])
    # Use a low tolerance: samples numerically differ, as the actual
    # implementation clips log-scales so they always contribute to sampling.
    self.assertAllClose(actual_sample_val, expected_sample_val, atol=1e-2)
Exemplo n.º 6
0
 def _graph_fn_unsquash(self, values):
     if get_backend() == "tf":
         return tf.atanh((values - self.low) /
                         (self.high - self.low) * 2.0 - 1.0)
     elif get_backend() == "tf":
         return torch.atanh((values - self.low) /
                            (self.high - self.low) * 2.0 - 1.0)
Exemplo n.º 7
0
 def neglogp(self, x):
     neglogp_likelihood = 0.5 * tf.reduce_sum(tf.square((tf.atanh(x)-self.mean)/(self.std+self.EPS)), axis=-1) \
            + 0.5 * np.log(2.0 * np.pi) * tf.cast(tf.shape(x)[-1], tf.float32) \
            + tf.reduce_sum(self.logstd, axis=-1)
     policy = self.sample()
     return neglogp_likelihood + tf.reduce_sum(
         tf.log(1 - policy**2 + self.EPS), axis=-1)
Exemplo n.º 8
0
def mlp_actor_critic(x, a, hidden_sizes=(400,300), activation=tf.nn.relu,
                     output_activation=None, policy=mlp_gaussian_policy, action_space=None):
    action_scale = action_space.high[0]
    a_unsqueeze = a / action_scale
    a_unsqueeze = tf.atanh(a_unsqueeze)
    # policy
    with tf.variable_scope('pi'):
        mu, pi, logp_pi, logp_a = policy(x, a_unsqueeze, hidden_sizes, activation, output_activation)
        mu, pi, logp_pi, logp_a = apply_squashing_func(mu, pi, logp_pi, a_unsqueeze, logp_a)

    # make sure actions are in correct range

    mu *= action_scale
    pi *= action_scale

    # vfs
    vf_mlp = lambda x: tf.squeeze(mlp(x, list(hidden_sizes) + [1], activation, None), axis=1)
    with tf.variable_scope('q1'):
        q1 = vf_mlp(tf.concat([x, a], axis=-1))
    with tf.variable_scope('q1', reuse=True):
        q1_pi = vf_mlp(tf.concat([x, pi], axis=-1))
    with tf.variable_scope('q2'):
        q2 = vf_mlp(tf.concat([x, a], axis=-1))
    with tf.variable_scope('q2', reuse=True):
        q2_pi = vf_mlp(tf.concat([x, pi], axis=-1))
    with tf.variable_scope('v'):
        v = vf_mlp(x)
    with tf.variable_scope('Q'):
        Q = vf_mlp(tf.concat([x, a], axis=-1))
    with tf.variable_scope('Q', reuse=True):
        Q_pi = vf_mlp(tf.concat([x, pi], axis=-1))
    with tf.variable_scope('R'):
        R = vf_mlp(x)
    return mu, pi, logp_pi, q1, q2, q1_pi, q2_pi, v, Q, Q_pi, R
Exemplo n.º 9
0
  def testDiscretizedMixLogisticLoss(self):
    batch = 2
    height = 4
    width = 4
    channels = 3
    num_mixtures = 5
    logits = tf.concat(  # assign all probability mass to first component
        [tf.ones([batch, height, width, 1]) * 1e8,
         tf.zeros([batch, height, width, num_mixtures - 1])],
        axis=-1)
    locs = tf.random_uniform([batch, height, width, num_mixtures * 3],
                             minval=-.9, maxval=.9)
    log_scales = tf.random_uniform([batch, height, width, num_mixtures * 3],
                                   minval=-1., maxval=1.)
    coeffs = tf.atanh(tf.zeros([batch, height, width, num_mixtures * 3]))
    pred = tf.concat([logits, locs, log_scales, coeffs], axis=-1)

    # Test labels that don't satisfy edge cases where 8-bit value is 0 or 255.
    labels = tf.random_uniform([batch, height, width, channels],
                               minval=-.9, maxval=.9)
    locs_0 = locs[..., :3]
    log_scales_0 = log_scales[..., :3]
    centered_labels = labels - locs_0
    inv_stdv = tf.exp(-log_scales_0)
    plus_in = inv_stdv * (centered_labels + 1. / 255.)
    min_in = inv_stdv * (centered_labels - 1. / 255.)
    cdf_plus = tf.nn.sigmoid(plus_in)
    cdf_min = tf.nn.sigmoid(min_in)
    expected_loss = -tf.reduce_sum(tf.log(cdf_plus - cdf_min), axis=-1)

    actual_loss = common_layers.discretized_mix_logistic_loss(
        pred=pred, labels=labels)
    actual_loss_val, expected_loss_val = self.evaluate(
        [actual_loss, expected_loss])
    self.assertAllClose(actual_loss_val, expected_loss_val, rtol=1e-5)
Exemplo n.º 10
0
  def testSampleFromDiscretizedMixLogistic(self):
    batch = 2
    height = 4
    width = 4
    num_mixtures = 5
    seed = 42
    logits = tf.concat(  # assign all probability mass to first component
        [tf.ones([batch, height, width, 1]) * 1e8,
         tf.zeros([batch, height, width, num_mixtures - 1])],
        axis=-1)
    locs = tf.random_uniform([batch, height, width, num_mixtures * 3],
                             minval=-.9, maxval=.9)
    log_scales = tf.ones([batch, height, width, num_mixtures * 3]) * -1e8
    coeffs = tf.atanh(tf.zeros([batch, height, width, num_mixtures * 3]))
    pred = tf.concat([logits, locs, log_scales, coeffs], axis=-1)

    locs_0 = locs[..., :3]
    expected_sample = tf.clip_by_value(locs_0, -1., 1.)

    actual_sample = common_layers.sample_from_discretized_mix_logistic(
        pred, seed=seed)
    actual_sample_val, expected_sample_val = self.evaluate(
        [actual_sample, expected_sample])
    # Use a low tolerance: samples numerically differ, as the actual
    # implementation clips log-scales so they always contribute to sampling.
    self.assertAllClose(actual_sample_val, expected_sample_val, atol=1e-2)
Exemplo n.º 11
0
def soft_round_inverse(y, alpha, eps=1e-12):
    """Inverse of soft_round().

  This is described in Sec. 4.1. in the paper
  > "Universally Quantized Neural Compression"<br />
  > Eirikur Agustsson & Lucas Theis<br />
  > https://arxiv.org/abs/2006.09952

  Args:
    y: tf.Tensor. Inputs to this function.
    alpha: Float or tf.Tensor. Controls smoothness of the approximation.
    eps: Float. Threshold below which soft_round() is assumed to equal the
      identity function.

  Returns:
    tf.Tensor
  """

    if isinstance(alpha, (float, int)) and alpha < eps:
        return tf.identity(y, name="soft_round_inverse")

    m = tf.floor(y) + 0.5
    s = (y - m) * (tf.tanh(alpha / 2.0) * 2.0)
    # We have -0.5 <= (y-m) <= 0.5 and -1 < tanh < 1, so
    # -1 <= s <= 1. However tf.atanh is only stable for inputs
    # in the range [-1+1e-7, 1-1e-7], so we (safely) clip s to this range.
    # In the rare case where `1-|s| < 1e-7`, we use straight-through for the
    # gradient.
    s = _clip_st(s)
    r = tf.atanh(s) / tf.maximum(alpha, eps)

    # For very low alphas, soft_round behaves like identity
    return tf.where(alpha < eps, y, m + r, name="soft_round_inverse")
Exemplo n.º 12
0
Arquivo: lbn.py Projeto: Nollde/LBN
 def eta(self):
     """
     Pseudorapidity.
     """
     return tf.atanh(
         tf.clip_by_value(self.pz() / self.p(), self.epsilon - 1,
                          1 - self.epsilon))
Exemplo n.º 13
0
def cont_bern_mean(lam, l_lim=0.49, u_lim=0.51):
    # continuous Bernoulli mean funtion in tensorflow
    # just like the normalizing constant, it is computed in a numerically stable way around 0.5
    cut_lam = tf.where(tf.logical_or(tf.less(lam, l_lim), tf.greater(lam, u_lim)), lam, l_lim * tf.ones_like(lam))
    mu = cut_lam / (2.0 * cut_lam - 1.0) + 1.0 / (2.0 * tf.atanh(1.0 - 2.0 * cut_lam))
    taylor = 0.5 + (lam - 0.5) / 3.0 + 16.0 / 45.0 * tf.pow(lam - 0.5, 3)
    return tf.where(tf.logical_or(tf.less(lam, l_lim), tf.greater(lam, u_lim)), mu, taylor)
Exemplo n.º 14
0
def h_log(c, x, input):

    xpy = mobius_add(c, -x, input)
    # print('xpy')
    # print(xpy.shape)
    # output = tf.identity(xpy, name = 'output')
    # print(c)
    # output = tf.identity(xpy, name = 'output')

    xpy_norm = safe_norm(xpy, axis=-1, keepdims=True)
    # output = tf.identity(xpy_norm, name = 'output')

    # print('clip')
    # print(xpy_norm.shape)
    # xpy_norm = tf.clip_by_value(
    #     xpy_norm,
    #     clip_value_min = -1. / np.sqrt(c) * (1. - 1e-3),
    #     clip_value_max = 1. / np.sqrt(c) * (1. - 1e-3)
    # )
    # print(xpy_norm.shape)

    output = (2. / (np.sqrt(c) * h_lambda(c, x)) *
              tf.atanh(np.sqrt(c) * xpy_norm) * xpy / xpy_norm)
    # output = tf.atanh(np.sqrt(c) * xpy_norm )
    # print('norm')
    # print(xpy.shape)
    # print(safe_norm(xpy).shape)

    return output
Exemplo n.º 15
0
def apply_harmonic_bias(channels, num_layers):
    """Offset network outputs to ensure harmonic distribution of initial alpha.

  The first num_layers-1 channels are the ones that will become the alpha
  channels for layers [1, N-1]. (There is no channel corresponding to the alpha
  of the back layer because is it always 1.0, i.e. fully opaque.)

  We adjust these first num_layers-1 channels so that instead of all layer
  alphas having an initial mean of 0.5, the Nth layer from the back has an
  initial mean of 1/N. This harmonic distribution allows each layer to
  contribute equal weight when the layers are composed.

  Args:
    channels: [..., N] Network output before final tanh activation.
    num_layers: How many layers we are predicting an MPI for.

  Returns:
    [..., N] Adjusted output.
  """
    # The range below begins at 2 because the back layer is not predicted, as it's
    # always fully opaque.
    alpha = 1.0 / tf.range(2, num_layers + 1, dtype=tf.float32)
    # Convert to desired offset before activation and scaling:
    shift = tf.atanh(2.0 * alpha - 1.0)

    # Remaining channels are left as is.
    no_shift = tf.zeros([tf.shape(channels)[-1] - (num_layers - 1)])
    shift = tf.concat([shift, no_shift], axis=-1)
    return channels + shift
Exemplo n.º 16
0
 def _inverse(self, y):
   # 0.99999997 is the maximum value such that atanh(x) is valid for both
   # tf.float32 and tf.float64
   y = tf.where(tf.less_equal(tf.abs(y), 1.),
                tf.clip_by_value(y, -0.99999997, 0.99999997),
                y)
   return tf.atanh(y)
Exemplo n.º 17
0
  def testDiscretizedMixLogisticLoss(self):
    batch = 2
    height = 4
    width = 4
    channels = 3
    num_mixtures = 5
    logits = tf.concat(  # assign all probability mass to first component
        [tf.ones([batch, height, width, 1]) * 1e8,
         tf.zeros([batch, height, width, num_mixtures - 1])],
        axis=-1)
    locs = tf.random_uniform([batch, height, width, num_mixtures * 3],
                             minval=-.9, maxval=.9)
    log_scales = tf.random_uniform([batch, height, width, num_mixtures * 3],
                                   minval=-1., maxval=1.)
    coeffs = tf.atanh(tf.zeros([batch, height, width, num_mixtures * 3]))
    pred = tf.concat([logits, locs, log_scales, coeffs], axis=-1)

    # Test labels that don't satisfy edge cases where 8-bit value is 0 or 255.
    labels = tf.random_uniform([batch, height, width, channels],
                               minval=-.9, maxval=.9)
    locs_0 = locs[..., :3]
    log_scales_0 = log_scales[..., :3]
    centered_labels = labels - locs_0
    inv_stdv = tf.exp(-log_scales_0)
    plus_in = inv_stdv * (centered_labels + 1. / 255.)
    min_in = inv_stdv * (centered_labels - 1. / 255.)
    cdf_plus = tf.nn.sigmoid(plus_in)
    cdf_min = tf.nn.sigmoid(min_in)
    expected_loss = -tf.reduce_sum(tf.log(cdf_plus - cdf_min), axis=-1)

    actual_loss = common_layers.discretized_mix_logistic_loss(
        pred=pred, labels=labels)
    actual_loss_val, expected_loss_val = self.evaluate(
        [actual_loss, expected_loss])
    self.assertAllClose(actual_loss_val, expected_loss_val, rtol=1e-5)
Exemplo n.º 18
0
 def log_pis_for(self, actions):
     if self._squash:
        raw_actions = tf.atanh(actions) 
        log_pis = self._distribution.log_prob(raw_actions)
        log_pis -= self._squash_correction(raw_actions)
        return log_pis
     return self._distribution.log_prob(raw_actions)
Exemplo n.º 19
0
def h_matmul(c, M, x):
    Mx = tf.matmul(M, x)

    output = (1. / np.sqrt(c) * tf.tanh(
        safe_norm(Mx) / safe_norm(x) * tf.atanh(np.sqrt(c) * safe_norm(x))) *
              Mx / safe_norm(Mx))

    return output
Exemplo n.º 20
0
 def _inverse(self, y):
     dtype = y.dtype
     y = tf.cast(y, tf.float32)
     y = tf.where(tf.less_equal(tf.abs(y), 1.),
                  tf.clip_by_value(y, -0.99999997, 0.99999997), y)
     y = tf.atanh(y)
     y = tf.cast(y, dtype)
     return y
 def logpac(self, action):
     from stable_baselines.sac.policies import gaussian_likelihood, EPS
     act_mu = self.policy_tf.act_mu
     log_std = tf.log(self.policy_tf.std)
     # Potentially we need to clip atanh and pass gradient
     log_u = gaussian_likelihood(
         tf.atanh(tf.clip_by_value(action, -0.99, 0.99)), act_mu, log_std)
     log_ac = log_u - tf.reduce_sum(tf.log(1 - action**2 + EPS), axis=1)
     return log_ac
Exemplo n.º 22
0
 def neglogp(self, x):
     if self.squash:
         return 0.5 * tf.reduce_sum(tf.square((tf.atanh(x) - self.mean) / self.std), axis=-1) \
             + 0.5 * np.log(2.0 * np.pi) * tf.cast(tf.shape(x)[-1], tf.float32) \
             + tf.reduce_sum(self.logstd, axis=-1) + tf.reduce_sum(tf.log(1-x**2+1e-6), axis=-1)
     else:
         return 0.5 * tf.reduce_sum(tf.square((x - self.mean) / self.std), axis=-1) \
             + 0.5 * np.log(2.0 * np.pi) * tf.cast(tf.shape(x)[-1], tf.float32) \
             + tf.reduce_sum(self.logstd, axis=-1)
Exemplo n.º 23
0
 def unsquash_action(mu, pi, log_std):
     """
     desquash action from [-1, 1] to [-inf, inf]
     """
     _pi = tf.atanh(pi)
     log_pi = Policy.gaussian_likelihood(_pi, mu, log_std)
     sub = tf.reduce_sum(tf.math.log(Policy.clip_but_pass_gradient(1 - pi**2, l=0, h=1) + 1e-6), axis=1, keepdims=True)
     log_pi -= sub
     return log_pi
Exemplo n.º 24
0
def cont_bern_log_norm(lam, l_lim=0.49, u_lim=0.51):
    # computes the log normalizing constant of a continuous Bernoulli distribution in a numerically stable way.
    # returns the log normalizing constant for lam in (0, l_lim) U (u_lim, 1) and a Taylor approximation in
    # [l_lim, u_lim].
    # cut_y below might appear useless, but it is important to not evaluate log_norm near 0.5 as tf.where evaluates
    # both options, regardless of the value of the condition.
    cut_lam = tf.where(tf.logical_or(tf.less(lam, l_lim), tf.greater(lam, u_lim)), lam, l_lim * tf.ones_like(lam))
    log_norm = tf.log(tf.abs(2.0 * tf.atanh(1 - 2.0 * cut_lam))) - tf.log(tf.abs(1 - 2.0 * cut_lam))
    taylor = tf.log(2.0) + 4.0 / 3.0 * tf.pow(lam - 0.5, 2) + 104.0 / 45.0 * tf.pow(lam - 0.5, 4)
    return tf.where(tf.logical_or(tf.less(lam, l_lim), tf.greater(lam, u_lim)), log_norm, taylor)
Exemplo n.º 25
0
def tf_my_mob_mat_distance(mat_x, mat_y):
    # input shape: [features, nodes]
    mat = tf_my_mob_mat_addition(-mat_x, mat_y)
    # mat = mat + EPS
    mat_norm = tf.norm(mat, axis=2)
    mat_norm = tf.clip_by_value(mat_norm,
                                clip_value_min=1e-8,
                                clip_value_max=clip_value)
    res = 2. * tf.atanh(mat_norm)
    return res
Exemplo n.º 26
0
 def log_prob(self, value, **kwargs):
     if self.squash:
         # from SAC paper: https://arxiv.org/pdf/1801.01290.pdf
         u = tf.atanh(value)
         correction = tf.reduce_sum(tf.log(1 - value ** 2 + EPSILON), axis=1)
         # correction = tf.reduce_sum(tf.log1p(-tf.square(value) + EPSILON), axis=1)
         log_prob = super().log_prob(u, **kwargs) - correction
     else:
         log_prob = super().log_prob(value, **kwargs)
     log_prob = tf.reduce_sum(log_prob, axis=-1)
     return log_prob
def AdaIN_adv_tanh(content,  epsilon=1e-5):
    meanC, varC = tf.nn.moments(content, [1, 2], keep_dims=True)
    bs = settings.config["BATCH_SIZE"]
    content_shape = content.shape.as_list()
    new_shape = [bs, 1, 1, content_shape[3]]
    with tf.variable_scope("scale"):
        sigmaS = tf.get_variable("sigma_S", shape=new_shape,
                            initializer=tf.zeros_initializer())
        meanS = tf.get_variable("mean_S", shape=new_shape,
                                  initializer=tf.zeros_initializer())
        

    sigmaC = tf.sqrt(tf.add(varC, epsilon))


    p=tf.sqrt(1.5)

    def get_mid_range(l,r):
        _mid=(l+r)/2.0
        _range=(r-l)/2.0
        return _mid,_range

    sign=tf.sign(meanC)
    abs_meanC=tf.abs(meanC)

    _sigma_mid, _sigma_range = get_mid_range(sigmaC/p, sigmaC*p)
    _mean_mid, _mean_range = get_mid_range(abs_meanC/p, abs_meanC*p)

    sigmaSp = _sigma_range*tf.nn.tanh(sigmaS)+_sigma_mid
    meanSp = sign * (_mean_range*tf.nn.tanh(meanS)+_mean_mid)

    ops_bound = []

    ops_asgn = [tf.assign(sigmaS, tf.atanh((sigmaC-_sigma_mid)/ (_sigma_range +1e-4) )), 
                tf.assign(meanS, tf.atanh((abs_meanC-_mean_mid)/(_mean_range + 1e-4) ))]

    #ops_asgn = [sigmaS.initializer, meanS.initializer]#
    #ops_asgn = [tf.assign(sigmaS, sigmaC-_sigma_mid),
    #            tf.assign(meanS, meanC-_mean_mid)]

    return (content - meanC) * sigmaSp / sigmaC + meanSp , ops_asgn, ops_bound, sigmaSp, meanSp, meanS, sigmaS
Exemplo n.º 28
0
Arquivo: klac.py Projeto: jypark/BOPAH
 def _build_baseline_policy_and_kl(self, target_dist, obs_input, action_input):
     EPS = 1e-6
     self.behavior_policy = Actor(self.action_dim, self.max_action, hidden_dim=self.hidden_dim)
     _, behavior_action_logp, behavior_dist = self.behavior_policy([obs_input])
     before_squahed_action = tf.atanh(tf.clip_by_value(action_input, -1 + EPS, 1 - EPS))
     log_likelihood = behavior_dist.log_prob(before_squahed_action)
     log_likelihood -= tf.reduce_sum(tf.log(1 - action_input ** 2 + EPS), axis=1)
     behavior_loss = -tf.reduce_mean(log_likelihood)
     behavior_optimizer = tf.train.AdamOptimizer(self.learning_rate)
     behavior_train_op = behavior_optimizer.minimize(behavior_loss, var_list=self.behavior_policy.trainable_variables)
     self.sess.run(tf.variables_initializer(behavior_optimizer.variables()))
     return target_dist.kl_divergence(behavior_dist)[:, None], behavior_train_op, behavior_loss
Exemplo n.º 29
0
    def _define_ops(self):
        super()._define_ops()

        # Loss to be optimized by attacker.
        self.loss: tf.Tensor = None

        # A single step of the attack. Will be run in order.
        self.step: List[tf.Tensor] = None

        # The output perturbed image.
        self.output: tf.Tensor = None

        # >>> Your code here <<<

        w = tf.Variable(tf.zeros(self.batch_shape), name="w")
        Xadv = 0.5 * (tf.tanh(w) + 1)

        logits = self.model.logits(Xadv)

        self.init_inputs.append(
            tf.assign(w, tf.atanh(1.9*(self.X_var-0.5)))
        )

        term1 = tf.reduce_sum(
            tf.square(Xadv - self.X_var),
            [1, 2, 3]
        )

        if self.target is None:
            others_score = tf.reduce_max((1 - self.Yi) * logits, axis=1)
            target_score = tf.reduce_sum(self.Yi * logits, axis=1)
            term2 = tf.maximum(target_score - others_score, -self.k)
        else:
            target_onehot = tf.one_hot(
                np.repeat(self.target, self.batch_size),
                self.model.num_classes
            )
            others_score = tf.reduce_max((1 - target_onehot) * logits, axis=1)
            target_score = tf.reduce_sum(target_onehot * logits, axis=1)
            term2 = tf.maximum(others_score - target_score, -self.k)

        self.loss = term1 + self.c * term2

        grad = tf.gradients(ys=self.loss, xs=w)[0]
        w_updated = tf.assign(w, w - self.lr * grad)

        self.step = [w_updated,
                     tf.assign(
                         self.lr,
                         self.lr * self.learning_rate_decay
                     )]

        self.output = tf.clip_by_value(Xadv, 0, 1)
Exemplo n.º 30
0
def tf_my_prod_mat_log_map_zero(M, c):
    sqrt_c = tf.sqrt(c)
    # M = tf.transpose(M)
    M = M + EPS
    M = tf.clip_by_norm(M, clip_norm=clip_value, axes=0)
    m_norm = tf.norm(M, axis=0)
    atan_norm = tf.atanh(
        tf.clip_by_value(m_norm * sqrt_c,
                         clip_value_min=-0.9,
                         clip_value_max=0.9))
    M_cof = atan_norm / m_norm / sqrt_c
    res = M * M_cof
    return res
Exemplo n.º 31
0
    def _create_q_update(self):
        """Create a minimization operation for Q-function update."""
        opponent_actions, opponent_actions_log_pis = self.opponent_policy.actions_for(
            observations=self._next_observations_ph,
            reuse=tf.AUTO_REUSE,
            with_log_pis=True)
        assert_shape(opponent_actions, [None, self._opponent_action_dim])

        prior = self._get_opponent_prior(self._next_observations_ph)
        raw_actions = tf.atanh(opponent_actions)
        prior_log_pis = prior.dist.log_prob(raw_actions)
        prior_log_pis = prior_log_pis - squash_correction(raw_actions)

        actions, actions_log_pis = self.policy.actions_for(
            observations=self._next_observations_ph,
            reuse=tf.AUTO_REUSE,
            with_log_pis=True,
            opponent_actions=opponent_actions)

        with tf.variable_scope('target_joint_q_agent_{}'.format(
                self._agent_id),
                               reuse=tf.AUTO_REUSE):
            q_value_targets = self.target_joint_qf.output_for(
                observations=self._next_observations_ph,
                actions=actions,
                opponent_actions=opponent_actions)
            q_value_targets = q_value_targets - self._annealing_pl * actions_log_pis - opponent_actions_log_pis + prior_log_pis
            assert_shape(q_value_targets, [None])

        self._q_values = self.joint_qf.output_for(self._observations_ph,
                                                  self._actions_pl,
                                                  self._opponent_actions_pl,
                                                  reuse=True)
        assert_shape(self._q_values, [None])

        ys = tf.stop_gradient(self._reward_scale * self._rewards_pl +
                              (1 - self._terminals_pl) * self._discount *
                              q_value_targets)
        assert_shape(ys, [None])

        bellman_residual = 0.5 * tf.reduce_mean((ys - self._q_values)**2)
        with tf.variable_scope('target_joint_qf_opt_agent_{}'.format(
                self._agent_id),
                               reuse=tf.AUTO_REUSE):
            if self._train_qf:
                td_train_op = tf.train.AdamOptimizer(self._qf_lr).minimize(
                    loss=bellman_residual,
                    var_list=self.joint_qf.get_params_internal())
                self._training_ops.append(td_train_op)

        self._bellman_residual = bellman_residual
Exemplo n.º 32
0
    def __init__(self,
                 grid_height,
                 grid_width,
                 target_control_points,
                 input_shape,
                 bounded=True,
                 **kwargs):
        '''
		tps_localizer will generate the source_control_point in the input images.

		Input
		--------
		grid_height -- The y dimension of the target_control_points
		grid_width -- The x dimension of the target_control_points
		target_control_points -- [x,y] of shape (N,2)
		input_shape -- The image 2D size of shape (H,W)
		bounded  -- If the grid extent is bounded from -1 to 1 or not
		'''
        super(tps_localizer, self).__init__(**kwargs)

        assert tf.shape(target_control_points)[0] == grid_width * grid_height
        self.output_dim = tf.shape(target_control_points)[0]

        self.layer1 = tf.keras.layers.Conv2D(filters=10,
                                             kernel_size=5,
                                             input_shape=input_dims)
        self.layer2 = tf.keras.layers.MaxPool2D(pool_size=2)
        self.layer3 = tf.keras.layers.Activation.ReLU()
        self.layer4 = tf.keras.layers.Conv2D(filters=20, kernel_size=5)
        self.layer5 = tf.keras.layers.SpatialDropout2D(rate=0.5)
        self.layer6 = tf.keras.layers.MaxPool2D(pool_size=2)
        self.layer7 = tf.keras.layers.Activation.ReLU()
        self.layer8 = tf.keras.layers.Flatten()
        self.layer9 = tf.keras.layers.Dense(units=50, activation='relu')
        self.layer10 = tf.keras.layers.Dropout(rate=0.5)

        if bounded:
            self.layer11 = tf.keras.layers.Dense(
                units=self.output_dim,
                activation='tanh',
                kernel_initializer=tf.keras.initializers.Zeros(),
                bias_initializer=tf.keras.initializers.Constant(
                    tf.atanh(target_control_points), dtype="float32"))
        else:
            self.layer11 = tf.keras.layers.Dense(
                units=self.output_dim,
                activation='linear',
                kernel_initializer=tf.keras.initializers.Zeros(),
                bias_initializer=tf.keras.initializers.Constant(
                    target_control_points, dtype="float32"))
Exemplo n.º 33
0
 def _inverse(self, y):
   return tf.atanh(y)
def adv_train_arctan_net(input_images, clip_norm=1.5):
    with tf.variable_scope('adv_encoder') as scope:
        width = 32
        height = 32
        batch_size = 128
        # code_length = 6000

        input_images = input_images/255

        arctan_images = tf.atanh(((input_images*2)-1)*0.999999)

        mean, var = tf.nn.moments(arctan_images, axes=tuple(range(1,len(input_images.shape))), keep_dims=True)
        normed_input_images = (arctan_images-mean)/var

        # Convolutional layer 1
        conv1 = tf.layers.conv2d(inputs=normed_input_images,
                                 filters=64,
                                 kernel_size=(5, 5),
                                 # kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                 activation=tf.nn.leaky_relu,
                                 padding='SAME',
                                 name='adv_conv1')

        # maxpool layer1
        maxpool1 = tf.layers.max_pooling2d(conv1, (3,3), (2,2), 'SAME')
        
        # Convolutional layer 2
        conv2 = tf.layers.conv2d(inputs=maxpool1,
                                 filters=128,
                                 kernel_size=(5, 5),
                                 # kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                 activation=tf.nn.leaky_relu,
                                 padding='SAME',
                                 name='adv_conv2')

        # maxpool layer2
        maxpool2 = tf.layers.max_pooling2d(conv2, (3,3), (2,2), 'SAME')

        deconv1 = tf.layers.conv2d_transpose(maxpool2, 64, (5,5), (2,2), 'SAME',
                                             activation=tf.nn.leaky_relu,
                                             name='adv_deconv1')

        adv_mask = tf.layers.conv2d_transpose(deconv1, 3, (5,5), (2,2), 'SAME',
                                             # activation=tf.nn.tanh,
                                             name='adv_deconv2')

        arctan_adv_images = adv_mask + normed_input_images
        unscaled_adv_images = tf.tanh(arctan_adv_images)
        unscaled_diff = unscaled_adv_images - input_images
        # clip_norm = 1.5
        scaled_dif = tf.clip_by_norm(unscaled_diff, clip_norm)
        adv_images = tf.clip_by_value(scaled_dif+input_images,0,1)
        output_images = tf.reshape(adv_images, (batch_size, height, width, 3)) * 255.0

        dif = adv_images - input_images

        # Display the training images in the visualizer.
        tf.summary.image('adv_images', output_images)

        # Reconstruction L2 loss
        mean_square_error = tf.reduce_mean(tf.square(dif), axis=list(range(1,len(dif.shape))))
        loss = tf.reduce_mean(mean_square_error, name='dis_loss')
        
    return loss, output_images