def artanh(x): if x.dtype == tf.float32: result = tf.atanh(tf.clip_by_value(x, clip_value_min=tf.constant([-1], dtype=tf.float32)+1e-7, clip_value_max=tf.constant([1],dtype=tf.float32)-1e-7)) elif x.dtype == tf.float64: result = tf.atanh(tf.clip_by_value(x, clip_value_min=tf.constant([-1], dtype=tf.float64)+1e-16, clip_value_max=tf.constant([1],dtype=tf.float64)-1e-16)) else: raise ValueError('invalid dtype!') return result
def pair_dy(self): """ Rapidity difference between all pairs of particles. """ # dy = y1 - y2 = atanh(beta1) - atanh(beta2) beta = tf.clip_by_value(self.beta(), self.epsilon, 1 - self.epsilon) dy = tf.atanh(tf.expand_dims(beta, axis=-1)) - tf.atanh(tf.expand_dims(beta, axis=-2)) # return only upper triangle without diagonal return tf.gather(tf.reshape(dy, [-1, self.n**2]), self.triu_indices, axis=1)
def soft_round_inverse(y, alpha, eps=1e-3): """Inverse of soft_round(). This is described in Sec. 4.1. in the paper > "Universally Quantized Neural Compression"<br /> > Eirikur Agustsson & Lucas Theis<br /> > https://arxiv.org/abs/2006.09952 Args: y: tf.Tensor. Inputs to this function. alpha: Float or tf.Tensor. Controls smoothness of the approximation. eps: Float. Threshold below which soft_round() is assumed to equal the identity function. Returns: tf.Tensor """ # This guards the gradient of tf.where below against NaNs, while maintaining # correctness, as for alpha < eps the result is ignored. alpha_bounded = tf.maximum(alpha, eps) m = tf.floor(y) + .5 s = (y - m) * (tf.tanh(alpha_bounded / 2.) * 2.) r = tf.atanh(s) / alpha_bounded # `r` must be between -.5 and .5 by definition. In case atanh becomes +-inf # due to numerical instability, this prevents the forward pass from yielding # infinite values. Note that it doesn't prevent the backward pass from # returning non-finite values. r = tf.clip_by_value(r, -.5, .5) # For very low alphas, soft_round behaves like identity. return tf.where(alpha < eps, y, m + r, name="soft_round_inverse")
def nlogp(self, dist, action): ''' negative logp of unnormalized action ''' before_squahed_action = tf.atanh( tf.clip_by_value(action, -1 + EPS, 1 - EPS)) log_likelihood = dist.log_prob(before_squahed_action) log_likelihood -= tf.reduce_sum(tf.log(1 - action**2 + EPS), axis=1) return -tf.reduce_mean(log_likelihood)
def testSampleFromDiscretizedMixLogistic(self): batch = 2 height = 4 width = 4 num_mixtures = 5 seed = 42 logits = tf.concat( # assign all probability mass to first component [tf.ones([batch, height, width, 1]) * 1e8, tf.zeros([batch, height, width, num_mixtures - 1])], axis=-1) locs = tf.random_uniform([batch, height, width, num_mixtures * 3], minval=-.9, maxval=.9) log_scales = tf.ones([batch, height, width, num_mixtures * 3]) * -1e8 coeffs = tf.atanh(tf.zeros([batch, height, width, num_mixtures * 3])) pred = tf.concat([logits, locs, log_scales, coeffs], axis=-1) locs_0 = locs[..., :3] expected_sample = tf.clip_by_value(locs_0, -1., 1.) actual_sample = common_layers.sample_from_discretized_mix_logistic( pred, seed=seed) actual_sample_val, expected_sample_val = self.evaluate( [actual_sample, expected_sample]) # Use a low tolerance: samples numerically differ, as the actual # implementation clips log-scales so they always contribute to sampling. self.assertAllClose(actual_sample_val, expected_sample_val, atol=1e-2)
def _graph_fn_unsquash(self, values): if get_backend() == "tf": return tf.atanh((values - self.low) / (self.high - self.low) * 2.0 - 1.0) elif get_backend() == "tf": return torch.atanh((values - self.low) / (self.high - self.low) * 2.0 - 1.0)
def neglogp(self, x): neglogp_likelihood = 0.5 * tf.reduce_sum(tf.square((tf.atanh(x)-self.mean)/(self.std+self.EPS)), axis=-1) \ + 0.5 * np.log(2.0 * np.pi) * tf.cast(tf.shape(x)[-1], tf.float32) \ + tf.reduce_sum(self.logstd, axis=-1) policy = self.sample() return neglogp_likelihood + tf.reduce_sum( tf.log(1 - policy**2 + self.EPS), axis=-1)
def mlp_actor_critic(x, a, hidden_sizes=(400,300), activation=tf.nn.relu, output_activation=None, policy=mlp_gaussian_policy, action_space=None): action_scale = action_space.high[0] a_unsqueeze = a / action_scale a_unsqueeze = tf.atanh(a_unsqueeze) # policy with tf.variable_scope('pi'): mu, pi, logp_pi, logp_a = policy(x, a_unsqueeze, hidden_sizes, activation, output_activation) mu, pi, logp_pi, logp_a = apply_squashing_func(mu, pi, logp_pi, a_unsqueeze, logp_a) # make sure actions are in correct range mu *= action_scale pi *= action_scale # vfs vf_mlp = lambda x: tf.squeeze(mlp(x, list(hidden_sizes) + [1], activation, None), axis=1) with tf.variable_scope('q1'): q1 = vf_mlp(tf.concat([x, a], axis=-1)) with tf.variable_scope('q1', reuse=True): q1_pi = vf_mlp(tf.concat([x, pi], axis=-1)) with tf.variable_scope('q2'): q2 = vf_mlp(tf.concat([x, a], axis=-1)) with tf.variable_scope('q2', reuse=True): q2_pi = vf_mlp(tf.concat([x, pi], axis=-1)) with tf.variable_scope('v'): v = vf_mlp(x) with tf.variable_scope('Q'): Q = vf_mlp(tf.concat([x, a], axis=-1)) with tf.variable_scope('Q', reuse=True): Q_pi = vf_mlp(tf.concat([x, pi], axis=-1)) with tf.variable_scope('R'): R = vf_mlp(x) return mu, pi, logp_pi, q1, q2, q1_pi, q2_pi, v, Q, Q_pi, R
def testDiscretizedMixLogisticLoss(self): batch = 2 height = 4 width = 4 channels = 3 num_mixtures = 5 logits = tf.concat( # assign all probability mass to first component [tf.ones([batch, height, width, 1]) * 1e8, tf.zeros([batch, height, width, num_mixtures - 1])], axis=-1) locs = tf.random_uniform([batch, height, width, num_mixtures * 3], minval=-.9, maxval=.9) log_scales = tf.random_uniform([batch, height, width, num_mixtures * 3], minval=-1., maxval=1.) coeffs = tf.atanh(tf.zeros([batch, height, width, num_mixtures * 3])) pred = tf.concat([logits, locs, log_scales, coeffs], axis=-1) # Test labels that don't satisfy edge cases where 8-bit value is 0 or 255. labels = tf.random_uniform([batch, height, width, channels], minval=-.9, maxval=.9) locs_0 = locs[..., :3] log_scales_0 = log_scales[..., :3] centered_labels = labels - locs_0 inv_stdv = tf.exp(-log_scales_0) plus_in = inv_stdv * (centered_labels + 1. / 255.) min_in = inv_stdv * (centered_labels - 1. / 255.) cdf_plus = tf.nn.sigmoid(plus_in) cdf_min = tf.nn.sigmoid(min_in) expected_loss = -tf.reduce_sum(tf.log(cdf_plus - cdf_min), axis=-1) actual_loss = common_layers.discretized_mix_logistic_loss( pred=pred, labels=labels) actual_loss_val, expected_loss_val = self.evaluate( [actual_loss, expected_loss]) self.assertAllClose(actual_loss_val, expected_loss_val, rtol=1e-5)
def soft_round_inverse(y, alpha, eps=1e-12): """Inverse of soft_round(). This is described in Sec. 4.1. in the paper > "Universally Quantized Neural Compression"<br /> > Eirikur Agustsson & Lucas Theis<br /> > https://arxiv.org/abs/2006.09952 Args: y: tf.Tensor. Inputs to this function. alpha: Float or tf.Tensor. Controls smoothness of the approximation. eps: Float. Threshold below which soft_round() is assumed to equal the identity function. Returns: tf.Tensor """ if isinstance(alpha, (float, int)) and alpha < eps: return tf.identity(y, name="soft_round_inverse") m = tf.floor(y) + 0.5 s = (y - m) * (tf.tanh(alpha / 2.0) * 2.0) # We have -0.5 <= (y-m) <= 0.5 and -1 < tanh < 1, so # -1 <= s <= 1. However tf.atanh is only stable for inputs # in the range [-1+1e-7, 1-1e-7], so we (safely) clip s to this range. # In the rare case where `1-|s| < 1e-7`, we use straight-through for the # gradient. s = _clip_st(s) r = tf.atanh(s) / tf.maximum(alpha, eps) # For very low alphas, soft_round behaves like identity return tf.where(alpha < eps, y, m + r, name="soft_round_inverse")
def eta(self): """ Pseudorapidity. """ return tf.atanh( tf.clip_by_value(self.pz() / self.p(), self.epsilon - 1, 1 - self.epsilon))
def cont_bern_mean(lam, l_lim=0.49, u_lim=0.51): # continuous Bernoulli mean funtion in tensorflow # just like the normalizing constant, it is computed in a numerically stable way around 0.5 cut_lam = tf.where(tf.logical_or(tf.less(lam, l_lim), tf.greater(lam, u_lim)), lam, l_lim * tf.ones_like(lam)) mu = cut_lam / (2.0 * cut_lam - 1.0) + 1.0 / (2.0 * tf.atanh(1.0 - 2.0 * cut_lam)) taylor = 0.5 + (lam - 0.5) / 3.0 + 16.0 / 45.0 * tf.pow(lam - 0.5, 3) return tf.where(tf.logical_or(tf.less(lam, l_lim), tf.greater(lam, u_lim)), mu, taylor)
def h_log(c, x, input): xpy = mobius_add(c, -x, input) # print('xpy') # print(xpy.shape) # output = tf.identity(xpy, name = 'output') # print(c) # output = tf.identity(xpy, name = 'output') xpy_norm = safe_norm(xpy, axis=-1, keepdims=True) # output = tf.identity(xpy_norm, name = 'output') # print('clip') # print(xpy_norm.shape) # xpy_norm = tf.clip_by_value( # xpy_norm, # clip_value_min = -1. / np.sqrt(c) * (1. - 1e-3), # clip_value_max = 1. / np.sqrt(c) * (1. - 1e-3) # ) # print(xpy_norm.shape) output = (2. / (np.sqrt(c) * h_lambda(c, x)) * tf.atanh(np.sqrt(c) * xpy_norm) * xpy / xpy_norm) # output = tf.atanh(np.sqrt(c) * xpy_norm ) # print('norm') # print(xpy.shape) # print(safe_norm(xpy).shape) return output
def apply_harmonic_bias(channels, num_layers): """Offset network outputs to ensure harmonic distribution of initial alpha. The first num_layers-1 channels are the ones that will become the alpha channels for layers [1, N-1]. (There is no channel corresponding to the alpha of the back layer because is it always 1.0, i.e. fully opaque.) We adjust these first num_layers-1 channels so that instead of all layer alphas having an initial mean of 0.5, the Nth layer from the back has an initial mean of 1/N. This harmonic distribution allows each layer to contribute equal weight when the layers are composed. Args: channels: [..., N] Network output before final tanh activation. num_layers: How many layers we are predicting an MPI for. Returns: [..., N] Adjusted output. """ # The range below begins at 2 because the back layer is not predicted, as it's # always fully opaque. alpha = 1.0 / tf.range(2, num_layers + 1, dtype=tf.float32) # Convert to desired offset before activation and scaling: shift = tf.atanh(2.0 * alpha - 1.0) # Remaining channels are left as is. no_shift = tf.zeros([tf.shape(channels)[-1] - (num_layers - 1)]) shift = tf.concat([shift, no_shift], axis=-1) return channels + shift
def _inverse(self, y): # 0.99999997 is the maximum value such that atanh(x) is valid for both # tf.float32 and tf.float64 y = tf.where(tf.less_equal(tf.abs(y), 1.), tf.clip_by_value(y, -0.99999997, 0.99999997), y) return tf.atanh(y)
def log_pis_for(self, actions): if self._squash: raw_actions = tf.atanh(actions) log_pis = self._distribution.log_prob(raw_actions) log_pis -= self._squash_correction(raw_actions) return log_pis return self._distribution.log_prob(raw_actions)
def h_matmul(c, M, x): Mx = tf.matmul(M, x) output = (1. / np.sqrt(c) * tf.tanh( safe_norm(Mx) / safe_norm(x) * tf.atanh(np.sqrt(c) * safe_norm(x))) * Mx / safe_norm(Mx)) return output
def _inverse(self, y): dtype = y.dtype y = tf.cast(y, tf.float32) y = tf.where(tf.less_equal(tf.abs(y), 1.), tf.clip_by_value(y, -0.99999997, 0.99999997), y) y = tf.atanh(y) y = tf.cast(y, dtype) return y
def logpac(self, action): from stable_baselines.sac.policies import gaussian_likelihood, EPS act_mu = self.policy_tf.act_mu log_std = tf.log(self.policy_tf.std) # Potentially we need to clip atanh and pass gradient log_u = gaussian_likelihood( tf.atanh(tf.clip_by_value(action, -0.99, 0.99)), act_mu, log_std) log_ac = log_u - tf.reduce_sum(tf.log(1 - action**2 + EPS), axis=1) return log_ac
def neglogp(self, x): if self.squash: return 0.5 * tf.reduce_sum(tf.square((tf.atanh(x) - self.mean) / self.std), axis=-1) \ + 0.5 * np.log(2.0 * np.pi) * tf.cast(tf.shape(x)[-1], tf.float32) \ + tf.reduce_sum(self.logstd, axis=-1) + tf.reduce_sum(tf.log(1-x**2+1e-6), axis=-1) else: return 0.5 * tf.reduce_sum(tf.square((x - self.mean) / self.std), axis=-1) \ + 0.5 * np.log(2.0 * np.pi) * tf.cast(tf.shape(x)[-1], tf.float32) \ + tf.reduce_sum(self.logstd, axis=-1)
def unsquash_action(mu, pi, log_std): """ desquash action from [-1, 1] to [-inf, inf] """ _pi = tf.atanh(pi) log_pi = Policy.gaussian_likelihood(_pi, mu, log_std) sub = tf.reduce_sum(tf.math.log(Policy.clip_but_pass_gradient(1 - pi**2, l=0, h=1) + 1e-6), axis=1, keepdims=True) log_pi -= sub return log_pi
def cont_bern_log_norm(lam, l_lim=0.49, u_lim=0.51): # computes the log normalizing constant of a continuous Bernoulli distribution in a numerically stable way. # returns the log normalizing constant for lam in (0, l_lim) U (u_lim, 1) and a Taylor approximation in # [l_lim, u_lim]. # cut_y below might appear useless, but it is important to not evaluate log_norm near 0.5 as tf.where evaluates # both options, regardless of the value of the condition. cut_lam = tf.where(tf.logical_or(tf.less(lam, l_lim), tf.greater(lam, u_lim)), lam, l_lim * tf.ones_like(lam)) log_norm = tf.log(tf.abs(2.0 * tf.atanh(1 - 2.0 * cut_lam))) - tf.log(tf.abs(1 - 2.0 * cut_lam)) taylor = tf.log(2.0) + 4.0 / 3.0 * tf.pow(lam - 0.5, 2) + 104.0 / 45.0 * tf.pow(lam - 0.5, 4) return tf.where(tf.logical_or(tf.less(lam, l_lim), tf.greater(lam, u_lim)), log_norm, taylor)
def tf_my_mob_mat_distance(mat_x, mat_y): # input shape: [features, nodes] mat = tf_my_mob_mat_addition(-mat_x, mat_y) # mat = mat + EPS mat_norm = tf.norm(mat, axis=2) mat_norm = tf.clip_by_value(mat_norm, clip_value_min=1e-8, clip_value_max=clip_value) res = 2. * tf.atanh(mat_norm) return res
def log_prob(self, value, **kwargs): if self.squash: # from SAC paper: https://arxiv.org/pdf/1801.01290.pdf u = tf.atanh(value) correction = tf.reduce_sum(tf.log(1 - value ** 2 + EPSILON), axis=1) # correction = tf.reduce_sum(tf.log1p(-tf.square(value) + EPSILON), axis=1) log_prob = super().log_prob(u, **kwargs) - correction else: log_prob = super().log_prob(value, **kwargs) log_prob = tf.reduce_sum(log_prob, axis=-1) return log_prob
def AdaIN_adv_tanh(content, epsilon=1e-5): meanC, varC = tf.nn.moments(content, [1, 2], keep_dims=True) bs = settings.config["BATCH_SIZE"] content_shape = content.shape.as_list() new_shape = [bs, 1, 1, content_shape[3]] with tf.variable_scope("scale"): sigmaS = tf.get_variable("sigma_S", shape=new_shape, initializer=tf.zeros_initializer()) meanS = tf.get_variable("mean_S", shape=new_shape, initializer=tf.zeros_initializer()) sigmaC = tf.sqrt(tf.add(varC, epsilon)) p=tf.sqrt(1.5) def get_mid_range(l,r): _mid=(l+r)/2.0 _range=(r-l)/2.0 return _mid,_range sign=tf.sign(meanC) abs_meanC=tf.abs(meanC) _sigma_mid, _sigma_range = get_mid_range(sigmaC/p, sigmaC*p) _mean_mid, _mean_range = get_mid_range(abs_meanC/p, abs_meanC*p) sigmaSp = _sigma_range*tf.nn.tanh(sigmaS)+_sigma_mid meanSp = sign * (_mean_range*tf.nn.tanh(meanS)+_mean_mid) ops_bound = [] ops_asgn = [tf.assign(sigmaS, tf.atanh((sigmaC-_sigma_mid)/ (_sigma_range +1e-4) )), tf.assign(meanS, tf.atanh((abs_meanC-_mean_mid)/(_mean_range + 1e-4) ))] #ops_asgn = [sigmaS.initializer, meanS.initializer]# #ops_asgn = [tf.assign(sigmaS, sigmaC-_sigma_mid), # tf.assign(meanS, meanC-_mean_mid)] return (content - meanC) * sigmaSp / sigmaC + meanSp , ops_asgn, ops_bound, sigmaSp, meanSp, meanS, sigmaS
def _build_baseline_policy_and_kl(self, target_dist, obs_input, action_input): EPS = 1e-6 self.behavior_policy = Actor(self.action_dim, self.max_action, hidden_dim=self.hidden_dim) _, behavior_action_logp, behavior_dist = self.behavior_policy([obs_input]) before_squahed_action = tf.atanh(tf.clip_by_value(action_input, -1 + EPS, 1 - EPS)) log_likelihood = behavior_dist.log_prob(before_squahed_action) log_likelihood -= tf.reduce_sum(tf.log(1 - action_input ** 2 + EPS), axis=1) behavior_loss = -tf.reduce_mean(log_likelihood) behavior_optimizer = tf.train.AdamOptimizer(self.learning_rate) behavior_train_op = behavior_optimizer.minimize(behavior_loss, var_list=self.behavior_policy.trainable_variables) self.sess.run(tf.variables_initializer(behavior_optimizer.variables())) return target_dist.kl_divergence(behavior_dist)[:, None], behavior_train_op, behavior_loss
def _define_ops(self): super()._define_ops() # Loss to be optimized by attacker. self.loss: tf.Tensor = None # A single step of the attack. Will be run in order. self.step: List[tf.Tensor] = None # The output perturbed image. self.output: tf.Tensor = None # >>> Your code here <<< w = tf.Variable(tf.zeros(self.batch_shape), name="w") Xadv = 0.5 * (tf.tanh(w) + 1) logits = self.model.logits(Xadv) self.init_inputs.append( tf.assign(w, tf.atanh(1.9*(self.X_var-0.5))) ) term1 = tf.reduce_sum( tf.square(Xadv - self.X_var), [1, 2, 3] ) if self.target is None: others_score = tf.reduce_max((1 - self.Yi) * logits, axis=1) target_score = tf.reduce_sum(self.Yi * logits, axis=1) term2 = tf.maximum(target_score - others_score, -self.k) else: target_onehot = tf.one_hot( np.repeat(self.target, self.batch_size), self.model.num_classes ) others_score = tf.reduce_max((1 - target_onehot) * logits, axis=1) target_score = tf.reduce_sum(target_onehot * logits, axis=1) term2 = tf.maximum(others_score - target_score, -self.k) self.loss = term1 + self.c * term2 grad = tf.gradients(ys=self.loss, xs=w)[0] w_updated = tf.assign(w, w - self.lr * grad) self.step = [w_updated, tf.assign( self.lr, self.lr * self.learning_rate_decay )] self.output = tf.clip_by_value(Xadv, 0, 1)
def tf_my_prod_mat_log_map_zero(M, c): sqrt_c = tf.sqrt(c) # M = tf.transpose(M) M = M + EPS M = tf.clip_by_norm(M, clip_norm=clip_value, axes=0) m_norm = tf.norm(M, axis=0) atan_norm = tf.atanh( tf.clip_by_value(m_norm * sqrt_c, clip_value_min=-0.9, clip_value_max=0.9)) M_cof = atan_norm / m_norm / sqrt_c res = M * M_cof return res
def _create_q_update(self): """Create a minimization operation for Q-function update.""" opponent_actions, opponent_actions_log_pis = self.opponent_policy.actions_for( observations=self._next_observations_ph, reuse=tf.AUTO_REUSE, with_log_pis=True) assert_shape(opponent_actions, [None, self._opponent_action_dim]) prior = self._get_opponent_prior(self._next_observations_ph) raw_actions = tf.atanh(opponent_actions) prior_log_pis = prior.dist.log_prob(raw_actions) prior_log_pis = prior_log_pis - squash_correction(raw_actions) actions, actions_log_pis = self.policy.actions_for( observations=self._next_observations_ph, reuse=tf.AUTO_REUSE, with_log_pis=True, opponent_actions=opponent_actions) with tf.variable_scope('target_joint_q_agent_{}'.format( self._agent_id), reuse=tf.AUTO_REUSE): q_value_targets = self.target_joint_qf.output_for( observations=self._next_observations_ph, actions=actions, opponent_actions=opponent_actions) q_value_targets = q_value_targets - self._annealing_pl * actions_log_pis - opponent_actions_log_pis + prior_log_pis assert_shape(q_value_targets, [None]) self._q_values = self.joint_qf.output_for(self._observations_ph, self._actions_pl, self._opponent_actions_pl, reuse=True) assert_shape(self._q_values, [None]) ys = tf.stop_gradient(self._reward_scale * self._rewards_pl + (1 - self._terminals_pl) * self._discount * q_value_targets) assert_shape(ys, [None]) bellman_residual = 0.5 * tf.reduce_mean((ys - self._q_values)**2) with tf.variable_scope('target_joint_qf_opt_agent_{}'.format( self._agent_id), reuse=tf.AUTO_REUSE): if self._train_qf: td_train_op = tf.train.AdamOptimizer(self._qf_lr).minimize( loss=bellman_residual, var_list=self.joint_qf.get_params_internal()) self._training_ops.append(td_train_op) self._bellman_residual = bellman_residual
def __init__(self, grid_height, grid_width, target_control_points, input_shape, bounded=True, **kwargs): ''' tps_localizer will generate the source_control_point in the input images. Input -------- grid_height -- The y dimension of the target_control_points grid_width -- The x dimension of the target_control_points target_control_points -- [x,y] of shape (N,2) input_shape -- The image 2D size of shape (H,W) bounded -- If the grid extent is bounded from -1 to 1 or not ''' super(tps_localizer, self).__init__(**kwargs) assert tf.shape(target_control_points)[0] == grid_width * grid_height self.output_dim = tf.shape(target_control_points)[0] self.layer1 = tf.keras.layers.Conv2D(filters=10, kernel_size=5, input_shape=input_dims) self.layer2 = tf.keras.layers.MaxPool2D(pool_size=2) self.layer3 = tf.keras.layers.Activation.ReLU() self.layer4 = tf.keras.layers.Conv2D(filters=20, kernel_size=5) self.layer5 = tf.keras.layers.SpatialDropout2D(rate=0.5) self.layer6 = tf.keras.layers.MaxPool2D(pool_size=2) self.layer7 = tf.keras.layers.Activation.ReLU() self.layer8 = tf.keras.layers.Flatten() self.layer9 = tf.keras.layers.Dense(units=50, activation='relu') self.layer10 = tf.keras.layers.Dropout(rate=0.5) if bounded: self.layer11 = tf.keras.layers.Dense( units=self.output_dim, activation='tanh', kernel_initializer=tf.keras.initializers.Zeros(), bias_initializer=tf.keras.initializers.Constant( tf.atanh(target_control_points), dtype="float32")) else: self.layer11 = tf.keras.layers.Dense( units=self.output_dim, activation='linear', kernel_initializer=tf.keras.initializers.Zeros(), bias_initializer=tf.keras.initializers.Constant( target_control_points, dtype="float32"))
def _inverse(self, y): return tf.atanh(y)
def adv_train_arctan_net(input_images, clip_norm=1.5): with tf.variable_scope('adv_encoder') as scope: width = 32 height = 32 batch_size = 128 # code_length = 6000 input_images = input_images/255 arctan_images = tf.atanh(((input_images*2)-1)*0.999999) mean, var = tf.nn.moments(arctan_images, axes=tuple(range(1,len(input_images.shape))), keep_dims=True) normed_input_images = (arctan_images-mean)/var # Convolutional layer 1 conv1 = tf.layers.conv2d(inputs=normed_input_images, filters=64, kernel_size=(5, 5), # kernel_initializer=tf.contrib.layers.xavier_initializer(), activation=tf.nn.leaky_relu, padding='SAME', name='adv_conv1') # maxpool layer1 maxpool1 = tf.layers.max_pooling2d(conv1, (3,3), (2,2), 'SAME') # Convolutional layer 2 conv2 = tf.layers.conv2d(inputs=maxpool1, filters=128, kernel_size=(5, 5), # kernel_initializer=tf.contrib.layers.xavier_initializer(), activation=tf.nn.leaky_relu, padding='SAME', name='adv_conv2') # maxpool layer2 maxpool2 = tf.layers.max_pooling2d(conv2, (3,3), (2,2), 'SAME') deconv1 = tf.layers.conv2d_transpose(maxpool2, 64, (5,5), (2,2), 'SAME', activation=tf.nn.leaky_relu, name='adv_deconv1') adv_mask = tf.layers.conv2d_transpose(deconv1, 3, (5,5), (2,2), 'SAME', # activation=tf.nn.tanh, name='adv_deconv2') arctan_adv_images = adv_mask + normed_input_images unscaled_adv_images = tf.tanh(arctan_adv_images) unscaled_diff = unscaled_adv_images - input_images # clip_norm = 1.5 scaled_dif = tf.clip_by_norm(unscaled_diff, clip_norm) adv_images = tf.clip_by_value(scaled_dif+input_images,0,1) output_images = tf.reshape(adv_images, (batch_size, height, width, 3)) * 255.0 dif = adv_images - input_images # Display the training images in the visualizer. tf.summary.image('adv_images', output_images) # Reconstruction L2 loss mean_square_error = tf.reduce_mean(tf.square(dif), axis=list(range(1,len(dif.shape)))) loss = tf.reduce_mean(mean_square_error, name='dis_loss') return loss, output_images