def _log_prob(self, sample, x_hat): mask = (sample + 1)/2#Remember that x_hat gives prob of all 1's not given sample's log_prob = ( tfm.log(x_hat + self.epsilon)*tf.cast(mask, tf.float32) + #type: ignore tfm.log(1 - x_hat + self.epsilon)*tf.cast(1 - mask, tf.float32))#type: ignore log_prob = tfm.reduce_sum(log_prob, [1,2,3]) return log_prob
def log_ggd(x, p, mu, alpha): cp = tf.cast( log(p) - ((p + 1) / p) * tf.cast(log(2.0), settings.float_type) - lgamma(1 / p), settings.float_type) res = tf.cast( cp - log(alpha) - pow(abs(x - mu), p) / (2 * pow(alpha, p)), settings.float_type) return res
def bce(pred, label): # Binary Cross Entropy if label == 0: pred = pred - config.EPSILON loss = math.log(1 - pred) else: pred = pred + config.EPSILON loss = math.log(pred) return -loss
def actor_predictive_clustering_loss(y_true, y_pred, cluster_assignment_probs, y_type='categorical', name='actor_pred_clus_L'): """ Compute prediction clustering loss between predicted output and true output with probability weights from cluster assignments. Inputs have shape (batch_size, T, num_classes) for y_pred and (batch_size, num_classes) for y_true or (batch_size, num_cluster) for cluster_assignment_probs. There are a variety of different settings, all weighted sample-wise by assignment probability: - Binary: Computes Binary Cross Entropy. Class/Event occurence is matched with a dimension. y_true with entries in [0,1], and y_pred with value between (0,1) - Categorical: Computes Cross Entropy Loss. Class assigned by highest value dimension. y_true is a one-hot encoding, and y_pred is a probabilistic vector. - Continuous: Computes L2 loss. Similar to the Binary case, but class attributes are continuous. y_true and y_pred both with real-value entries. Returns: Loss value between sample true y and predicted y based on y_type of shape (batch_size) """ y_true_temp_ = tf.repeat(tf.expand_dims(y_true, axis=1), repeats=y_pred.shape[1], axis=1, name='true_y_time') if y_type == 'binary': # Compute Binary Cross Entropy weighted by cluster assignment probabilities. sample_loss = multiply( tf.reduce_sum(y_true_temp_ * log(y_pred) + (1 - y_true_temp_) * log(y_pred), axis=-1), cluster_assignment_probs) batch_loss = -tf.reduce_mean(sample_loss, name=name) return batch_loss elif y_type == 'categorical': # Compute Categorical Cross Entropy weighted by cluster assignment probabilities. sample_loss = multiply( tf.reduce_sum(y_true_temp_ * log(y_pred), axis=-1), cluster_assignment_probs) batch_loss = -tf.reduce_mean(sample_loss, name=name) return batch_loss elif y_type == 'continuous': # Compute L2 Loss weighted by cluster assigment probabilities. sample_loss = multiply(tf.reduce_sum((y_true - y_pred)**2, axis=-1), cluster_assignment_probs) batch_loss = tf.reduce_mean(sample_loss, name=name) return batch_loss
def PST(I, LPF, Phase_strength, Warp_strength, Threshold_min, Threshold_max): #inverting Threshold_min to simplyfy optimization porcess, so we can clip all variable between 0 and 1 LPF = ops.convert_to_tensor_v2(LPF) Phase_strength = ops.convert_to_tensor_v2(Phase_strength) Warp_strength = ops.convert_to_tensor_v2(Warp_strength) I = ops.convert_to_tensor_v2(I) Threshold_min = ops.convert_to_tensor_v2(Threshold_min) Threshold_max = ops.convert_to_tensor_v2(Threshold_max) Threshold_min = -Threshold_min L = 0.5 x = tf.linspace(-L, L, I.shape[0]) y = tf.linspace(-L, L, I.shape[1]) [X1, Y1] = (tf.meshgrid(x, y)) X = tf.transpose(X1) Y = tf.transpose(Y1) [THETA, RHO] = cart2pol(X, Y) # Apply localization kernel to the original image to reduce noise Image_orig_f = sig.fft2d(tf.dtypes.cast(I, tf.complex64)) tmp6 = (LPF**2.0) / tfm.log(2.0) tmp5 = tfm.sqrt(tmp6) tmp4 = (tfm.divide(RHO, tmp5)) tmp3 = -tfm.pow(tmp4, 2) tmp2 = tfm.exp(tmp3) expo = fftshift(tmp2) Image_orig_filtered = tfm.real( sig.ifft2d((tfm.multiply(tf.dtypes.cast(Image_orig_f, tf.complex64), tf.dtypes.cast(expo, tf.complex64))))) # Constructing the PST Kernel tp1 = tfm.multiply(RHO, Warp_strength) PST_Kernel_1 = tfm.multiply( tp1, tfm.atan(tfm.multiply(RHO, Warp_strength)) ) - 0.5 * tfm.log(1.0 + tfm.pow(tf.multiply(RHO, Warp_strength), 2.0)) PST_Kernel = PST_Kernel_1 / tfm.reduce_max(PST_Kernel_1) * Phase_strength # Apply the PST Kernel temp = tfm.multiply( fftshift( tfm.exp( tfm.multiply(tf.dtypes.complex(0.0, -1.0), tf.dtypes.cast(PST_Kernel, tf.dtypes.complex64)))), sig.fft2d(tf.dtypes.cast(Image_orig_filtered, tf.dtypes.complex64))) Image_orig_filtered_PST = sig.ifft2d(temp) # Calculate phase of the transformed image PHI_features = tfm.angle(Image_orig_filtered_PST) out = PHI_features out = (out / tfm.reduce_max(out)) * 3 return out
def loss(pmodel, pposition, pnext_position, prandom_position, last, result, to_move, training): y_position = pmodel(pposition, training=training) y_next_position = pmodel(pnext_position, training=training) y_random_position = pmodel(prandom_position, training=training) last = tf.cast(last, dtype=bool) y_next_position = tf.where(tf.reshape(last, [32,1]), tf.reshape(result, [32,1]), tf.reshape(y_next_position, [32, 1])) return -(tf.reduce_mean(log(sigmoid(tf.cast(tf.reshape(tf.math.pow(-1, to_move), [32, 1]), dtype=tf.float32) * (y_random_position - y_next_position))) + kappa * log(sigmoid(- y_position + y_next_position)) + kappa * log(sigmoid(y_position - y_next_position))))
def adaptive_wing_loss(labels, output): alpha = 2.1 omega = 14 epsilon = 1 theta = 0.5 with tf.name_scope('adaptive_wing_loss'): x = output - labels theta_over_epsilon_tensor = tf.fill(tf.shape(labels), theta/epsilon) A = omega*(1/(1+pow(theta_over_epsilon_tensor, alpha-labels)))*(alpha-labels)*pow(theta_over_epsilon_tensor, alpha-labels-1)*(1/epsilon) C = theta*A-omega*log(1+pow(theta_over_epsilon_tensor, alpha-labels)) absolute_x = abs(x) losses = tf.where(greater(theta, absolute_x), omega*log(1+pow(absolute_x/epsilon, alpha-labels)), A*absolute_x-C) loss = reduce_mean(reduce_sum(losses, axis=[1, 2]), axis=0) return loss
def demo_gmm_log_pdf(z): norm1 = tfd.MultivariateNormalFullCovariance( loc=demo_gmm_mu1, covariance_matrix=demo_gmm_sigma1) norm2 = tfd.MultivariateNormalFullCovariance( loc=demo_gmm_mu2, covariance_matrix=demo_gmm_sigma2) return math.log(0.5 * norm1.prob(z) + 0.5 * norm2.prob(z))
def call(self, x): """Calculates the probability of sample x Args: x (int32): Value of input lattice """ def SplDense(x): """We are using this "layer" instead of regular keras Dense layer to facilitate use of common kernel and bias""" return tfk.activations.sigmoid(tf.matmul(x, self.kernel) + self.bias) x = self.flatten(x) x0 = tf.gather (x, tf.range(self.D-1), axis=1) mask = tf.broadcast_to (self.mask, [x.shape[0]]+self.mask.shape) x1 = tf.broadcast_to (tf.expand_dims(x0,1), (x.shape[0],self.D-1,self.D-1)) x1 = mask*x1 #For each data inside batch, x1 contains a concat of all #dependencies of each element inside h = SplDense(x1) y = self.output_layer(h) x2 = tf.gather (x, tf.range(1,self.D),axis=1) p = -tfm.log(0.5*(1-x2) + (x2*y)) return tf.reduce_mean (p, axis=1)
def inference(self, features, outputs, is_train): """Inference for targeting ybar""" if not is_train: return super().inference(features, outputs, is_train) sens_attr = tf.cast(tf.squeeze(features['sensitive'], -1), dtype=tf.int32) out_int = tf.cast(tf.squeeze(outputs, -1), dtype=tf.int32) # likelihood for y=1 lik1 = tf.squeeze(tf.nn.sigmoid(self._logits(features)), axis=-1) # likelihood for y=0 lik0 = 1 - lik1 lik = tf.stack((lik0, lik1), axis=-1) debias = self._debiasing_parameters() # `debias` has the shape (y, s, y'). we stack output and sensitive to (batch_size, 2) # then we use the last 2 values of that as indices for `debias` # shape of debias_per_example: (batch_size, output_dim, 2) debias_per_example = tft.gather_nd( debias, tf.stack((out_int, sens_attr), axis=-1)) weighted_lik = debias_per_example * lik log_cond_prob = tfm.log(tf.reduce_sum(weighted_lik, axis=-1)) regr_loss = -tf.reduce_mean(log_cond_prob) l2_loss = self._l2_loss() return ({ 'loss': regr_loss + l2_loss, 'regr_loss': regr_loss, 'l2_loss': l2_loss }, self._trainable_variables())
def call(self, ensemble_logits, logits): ''' ensemble_logits are the outputs from our ensemble (batch x ensembles x classes) logits are the predicted outputs from our model (batch x classes) ''' if self.temp is None: self.temp = self.init_temp # Convert values to appropiate type logits = tf.cast(logits, dtype=tf.float64) ensemble_logits = tf.cast(ensemble_logits, dtype=tf.float64) # Calculate probabilities by softmax over classes, adjusted for temperature ensemble_probs = softmax(ensemble_logits / self.temp, axis=2) PN_probs = softmax(logits / self.temp, axis=1) # Calculate mean teacher prediction ensemble_probs_mean = reduce_sum(ensemble_probs, axis=1) # Calculate cost (entropy) cost = reduce_mean(-ensemble_probs_mean * log(PN_probs)**(self.temp**2)) return cost
def _build_loo_loss(self, weights, means, chol_covars, inducing_inputs, kernel_chol, features, train_outputs): """Construct leave out one loss Args: weights: (num_components,) means: shape: (num_components, num_latent, num_inducing) chold_covars: shape: (num_components, num_latent, num_inducing[, num_inducing]) inducing_inputs: (num_latent, num_inducing, input_dim) kernel_chol: (num_latent, num_inducing, num_inducing) train_inputs: (batch_size, input_dim) train_outputs: (batch_size, num_latent) Returns: LOO loss """ kern_prods, kern_sums = self._build_interim_vals( kernel_chol, inducing_inputs, features['input']) loss = 0 latent_samples = self._build_samples(kern_prods, kern_sums, means, chol_covars) # output of log_cond_prob: (num_components, num_samples, batch_size, num_latent) # shape of loss_by_component: (num_components, batch_size, num_latent) loss_by_component = tf.reduce_mean( input_tensor=1.0 / (tf.exp(self.lik.log_cond_prob(train_outputs, latent_samples)) + 1e-7), axis=1) loss = tf.reduce_sum(input_tensor=weights[:, tf.newaxis, tf.newaxis] * loss_by_component, axis=0) return tf.reduce_sum(input_tensor=tfm.log(loss))
def one_hot(target_action, action_prob, reward): action_dim = action_prob.shape[0] action_onehot = tf.one_hot(target_action, action_dim) action_mask = tf.cast(action_onehot, tf.bool) picked_prob = tf.boolean_mask(action_prob, action_mask) action_loss = tf.reduce_sum(-math.log(picked_prob) * reward) return action_loss
def scce(cell, label): # Sparse Categorical Cross Entropy pred = cell[int(label) + 1] pred += config.EPSILON loss = math.log(pred) return -loss
def figure_eight_log_pdf(z): comp1 = tfd.MultivariateNormalDiag(loc=figure_eight_mu1, scale_diag=figure_eight_scale) comp2 = tfd.MultivariateNormalDiag(loc=figure_eight_mu2, scale_diag=figure_eight_scale) return math.log((1 - figure_eight_pi) * comp1.prob(z) + figure_eight_pi * comp2.prob(z))
def energy_4_log_pdf(z): z2 = z[:, 1] x1 = -0.5 * ((z2 - w1(z)) / 0.4)**2 x2 = -0.5 * ((z2 - w1(z) + w3(z)) / 0.35)**2 a = math.maximum(x1, x2) exp1 = math.exp(x1 - a) exp2 = math.exp(x2 - a) return a + math.log(exp1 + exp2)
def g_loss(d_scores_fake): """ `d_scores_fake` is the output of the discrimonator model applied to a batch of fake data NOTE: we always define objectives as if we were minimizing them (remember that maximize = negate and minimize) """ if TASK == 1: return tm.reduce_mean(tm.log(1 - d_scores_fake)) elif TASK == 2: return -tm.reduce_mean(tm.log(d_scores_fake)) elif TASK == 3 or TASK == 4: # tries to maximize score such that is becomes positive # (similar to the discriminator score) return -tm.reduce_mean(d_scores_fake) elif TASK == 5: # INN does not generator return None
def circle_log_pdf(z): z1, z2 = z[:, 0], z[:, 1] norm = (z1**2 + z2**2)**0.5 exp1 = math.exp(-0.2 * ((z1 - 2) / 0.8)**2) exp2 = math.exp(-0.2 * ((z1 + 2) / 0.8)**2) u = 0.5 * ((norm - 4) / 0.4)**2 - math.log(exp1 + exp2) return -u
def predictive_clustering_loss(y_true, y_pred, y_type='categorical', name='pred_clus_L'): """Compute prediction clustering loss between predicted output and true output. Inputs have shape (batch_size, T, num_classes) for y_pred and (batch_size, num_classes) for y_true. There are a variety of different settings: - Binary: Computes Binary Cross Entropy. Class/Event occurence is matched with a dimension. y_true with entries in [0,1], and y_pred with value between (0,1) - Categorical: Computes Cross Entropy Loss. Class assigned by highest value dimension. y_true is a one-hot encoding, and y_pred is a probabilistic vector. - Continuous: Computes L2 loss. Similar to the Binary case, but class attributes are continuous. y_true and y_pred both with real-value entries. Returns: Loss value between sample true y and predicted y based on y_type of shape (batch_size) """ y_true_temp_ = tf.repeat(tf.expand_dims(y_true, axis=1), repeats=y_pred.shape[1], axis=1, name='true_y_time') if y_type == 'binary': # Compute Binary Cross Entropy. y_pred output of sigmoid function to avoid taking log of infty. batch_loss = -tf.reduce_mean(tf.reduce_sum( y_true_temp_ * log(y_pred) + (1 - y_true_temp_) * log(y_pred), axis=-1), name=name) elif y_type == 'categorical': # Compute Categorical Cross Entropy. y_pred output of softmax function to model probability vector. batch_loss = -tf.reduce_mean( tf.reduce_sum(y_true_temp_ * log(y_pred), axis=-1), name=name) elif y_type == 'continuous': # Compute L2 Loss. y_pred not given final output function. batch_loss = tf.reduce_mean(tf.reduce_sum((y_true_temp_ - y_pred)**2, axis=-1), name=name) else: raise Exception( """y_type not well-defined. Only possible values are {'binary', 'categorical', 'continuous'}""" ) return batch_loss
def PSNR(y_true, y_pred): """ @param y_true: target value @param y_pred: predicted value """ max_pixel = 0.5 y_pred = K.clip(y_pred, -0.5, 0.5) return 10.0 * log((max_pixel ** 2) / (K.mean(K.square(y_pred - y_true))))
def energy_1_log_pdf(z): z1, z2 = z[:, 0], z[:, 1] norm = (z1**2 + z2**2)**0.5 exp1 = math.exp(-0.5 * ((z1 - 2) / 0.6)**2) exp2 = math.exp(-0.5 * ((z1 + 2) / 0.6)**2) u = 0.5 * ((norm - 2) / 0.4)**2 - math.log(exp1 + exp2) return -u
def logit(x, nan_replace=0): # print(x>1) # if reduce_any(x>1): # return np.inf * ones(x.shape, dtype='float64') x = tfm.log(x / (1 - x)) x = tf.where(tf.math.is_nan(x), nan_replace * tf.ones([], x.dtype), x) return x
def eight_schools_log_pdf(z, centered=EIGHT_SCHOOL_CENTERED): prior_mu = tfd.Normal(loc=0, scale=5) prior_tau = tfd.HalfCauchy(loc=0, scale=5) mu, log_tau = z[:, -2], z[:, -1] # Adapt size of mu an tau. mu = tf.transpose(eight_schools_replicate * mu) log_tau = tf.transpose(eight_schools_replicate * log_tau) if centered: # shapes, thetas=(8,N), mu=(N,), tau=(N,) thetas = z[:, 0:eight_schools_K] likelihood = tfd.Normal(loc=thetas, scale=eight_schools_sigma[0:eight_schools_K]) prior_theta = tfd.Normal(loc=mu, scale=math.exp(log_tau)) log_det_jac = math.log(math.exp( log_tau)) # kept log(exp()) for mathematical understanding. return likelihood.log_prob( eight_schools_y[0:eight_schools_K]) + prior_theta.log_prob( thetas) + prior_mu.log_prob(mu) + prior_tau.log_prob( math.exp(log_tau)) + log_det_jac else: # shapes, thetas=(8,N), mu=(N,), tau=(N,) thetas_tilde = z[:, 0:eight_schools_K] zeros = tf.zeros(mu.shape) ones = tf.ones(log_tau.shape) thetas = mu + thetas_tilde * math.exp(log_tau) likelihood = tfd.Normal(loc=thetas, scale=eight_schools_sigma[0:eight_schools_K]) prior_theta = tfd.Normal(loc=zeros, scale=ones) log_det_jac = math.log(math.exp( log_tau)) # kept log(exp()) for mathematical understanding. return likelihood.log_prob( eight_schools_y[0:eight_schools_K]) + prior_theta.log_prob( thetas_tilde) + prior_mu.log_prob(mu) + prior_tau.log_prob( math.exp(log_tau)) + log_det_jac
def log_prob(self, sample, beta): x_hat = self.call(sample, beta) log_prob = self._log_prob(sample, x_hat) if self.z2: sample_inv = -sample x_hat_inv = self.call(sample_inv, beta) log_prob_inv = self._log_prob(sample_inv, x_hat_inv) log_prob = tfm.reduce_logsumexp(tf.stack([log_prob, log_prob_inv]), axis=0) log_prob -= tfm.log(2.) return tf.cast(log_prob, tf.float32)
def d_loss(d_scores_fake, d_scores_real): """ `d_scores_fake` is the output of the discrimonator model applied to a batch of fake data `d_scores_real` is the output of the discrimonator model applied to a batch of real data NOTE: we always define objectives as if we were minimizing them (remember that maximize = negate and minimize) """ if TASK == 1: return -tm.reduce_mean( tm.log(d_scores_real) + tm.log(1 - d_scores_fake)) elif TASK == 2: return -tm.reduce_mean( tm.log(d_scores_real) + tm.log(1 - d_scores_fake)) elif TASK == 3 or TASK == 4: # Maximize Critic score # push real samples mean to large positive values, # and push fake scores mean to large negative values return -(tm.reduce_mean(d_scores_real) - tm.reduce_mean(d_scores_fake)) elif TASK == 5: return -(tm.reduce_mean(d_scores_real) - tm.reduce_mean(d_scores_fake))
def compute_gaussian_log_pdf(z, z_mean, z_log_var): """ Compute the log probability density of a Gaussian distribution. Based on Locatello et al. implementation (https://github.com/google-research/disentanglement_lib) :param z: the sampled values :param z_mean: the mean of the Gaussian :param z_log_var: the log variance of the Gaussian :return: the log probability density """ log2pi = tfm.log(2. * tf.constant(pi)) return -0.5 * (tfm.square(z - z_mean) * tfm.exp(-z_log_var) + z_log_var + log2pi)
def _binary_crossentropy(y_true: Tensor, y_pred: Tensor, from_logits: bool = False, label_smoothing: float = 0) -> Tensor: """(losses.binary_crossentropy()) :param y_true: shape = (N, X), float32. 一般而言, X=1 :param y_pred: shape = (N, X), float32 :return: shape = (N, X), float32""" y_pred = tf.clip_by_value(y_pred, 2e-7, 1 - 2e-7) # 防止inf y_true = y_true if (label_smoothing == 0) else (y_true * (1 - label_smoothing) + label_smoothing / 2) if from_logits: return tf.reduce_mean(-y_true * tfm.log_sigmoid(y_pred) + -(1 - y_true) * tfm.log_sigmoid(-y_pred), axis=-1) else: return tf.reduce_mean(-y_true * tfm.log(y_pred) + -(1 - y_true) * tfm.log(1 - y_pred), axis=-1)
def _genes(self, fbar, kbar, k_fbar, wbar, w_0bar, σ2_m, Δ): m_pred = self.predict_m(kbar, k_fbar, wbar, fbar, w_0bar, Δ) sq_diff = tfm.square(self.data.m_obs - tf.transpose( tf.gather(tf.transpose(m_pred), self.data.common_indices))) variance = tf.reshape(σ2_m, (-1, 1)) if self.preprocessing_variance: variance = logit( variance) + self.data.σ2_m_pre # add PUMA variance log_lik = -0.5 * tfm.log(2 * PI * variance) - 0.5 * sq_diff / variance log_lik = tf.reduce_sum(log_lik) return log_lik
def _build_log_marginal_likelihood(train_outputs, chol, alpha): # contract the batch dimension, quad_form (num_latent,) quad_form = tf.matmul(train_outputs, alpha, transpose_a=True) log_trace = util.log_cholesky_det(chol) # tf.reduce_sum(tfm.log(tf.matrix_diag_part(chol)), -1) # log_marginal_likelihood (num_latent,) num_train = tf.to_float(tf.shape(chol)[-1]) log_marginal_likelihood = -0.5 * (quad_form + log_trace + num_train * tfm.log(np.pi)) # Sum over num_latent in the end to get a scalar, this corresponds to mutliplying the # marginal likelihoods of all the latent functions return tf.reduce_sum(log_marginal_likelihood)
def cluster_probability_entropy_loss(y_prob, name='clus_entr_L'): """ Compute Entropy loss on Cluster Probability assignments. Inputs have shape (batch_size, num_classes), and y_prob is a probabilistic vector. :param y_prob: a probabilistic vector :return: Entropy loss, defined as - sum pi*log(pi) with minimum obtained by one-hot probability vectors. """ # assert tf.reduce_all(tf.reduce_sum(y_prob, axis = -1) , axis = None) batch_loss = tf.reduce_mean(-tf.reduce_sum(y_prob * log(y_prob), axis=-1), name=name) return batch_loss