def _cdf(self, x): broadcasted_x = x * array_ops.ones(self.batch_shape()) zeros = array_ops.zeros_like(x + self.a + self.b, dtype=self.dtype) ones = array_ops.ones_like(x + self.a + self.b, dtype=self.dtype) result_if_not_big = math_ops.select( x < self.a, zeros, (broadcasted_x - self.a) / self.range()) return math_ops.select(x >= self.b, ones, result_if_not_big)
def _log_cdf(self, y): lower_cutoff = self._lower_cutoff upper_cutoff = self._upper_cutoff # Recall the promise: # cdf(y) := P[Y <= y] # = 1, if y >= upper_cutoff, # = 0, if y < lower_cutoff, # = P[X <= y], otherwise. # P[Y <= j] = P[floor(Y) <= j] since mass is only at integers, not in # between. j = math_ops.floor(y) result_so_far = self.distribution.log_cdf(j) # Broadcast, because it's possible that this is a single distribution being # evaluated on a number of samples, or something like that. j += array_ops.zeros_like(result_so_far) # Re-define values at the cutoffs. if lower_cutoff is not None: neg_inf = -np.inf * array_ops.ones_like(result_so_far) result_so_far = math_ops.select(j < lower_cutoff, neg_inf, result_so_far) if upper_cutoff is not None: result_so_far = math_ops.select(j >= upper_cutoff, array_ops.zeros_like(result_so_far), result_so_far) return result_so_far
def pdf(self, x, name="pdf"): """The PDF of observations in `x` under these Uniform distribution(s). Args: x: tensor of dtype `dtype`, must be broadcastable with `a` and `b`. name: The name to give this op. Returns: pdf: tensor of dtype `dtype`, the pdf values of `x`. If `x` is `nan`, will return `nan`. """ with ops.name_scope(self.name): with ops.op_scope([self.a, self.b, x], name): x = ops.convert_to_tensor(x, name="x") if x.dtype != self.dtype: raise TypeError("Input x dtype does not match dtype: %s vs. %s" % (x.dtype, self.dtype)) broadcasted_x = x * self._ones() return math_ops.select( math_ops.is_nan(broadcasted_x), broadcasted_x, math_ops.select( math_ops.logical_or(broadcasted_x < self.a, broadcasted_x > self.b), array_ops.zeros_like(broadcasted_x), (1.0 / self.range()) * array_ops.ones_like(broadcasted_x)))
def cdf(self, x, name="cdf"): """CDF of observations in `x` under these Uniform distribution(s). Args: x: tensor of dtype `dtype`, must be broadcastable with `a` and `b`. name: The name to give this op. Returns: cdf: tensor of dtype `dtype`, the CDFs of `x`. If `x` is `nan`, will return `nan`. """ with ops.op_scope([self.a, self.b, x], self.name): with ops.name_scope(name): x = ops.convert_to_tensor(x, name="x") if x.dtype != self.dtype: raise TypeError( "Input x dtype does not match dtype: %s vs. %s" % (x.dtype, self.dtype)) broadcasted_x = x * self._ones() return math_ops.select( broadcasted_x < self.a, array_ops.zeros_like(broadcasted_x), math_ops.select(broadcasted_x >= self.b, array_ops.ones_like(broadcasted_x), (broadcasted_x - self.a) / self.range))
def _variance(self): var = self._ones() * math_ops.square(self.sigma) * self.df / (self.df - 2) # When 1 < df <= 2, variance is infinite. inf = np.array(np.inf, dtype=self.dtype.as_numpy_dtype()) result_where_defined = math_ops.select( math_ops.greater(self.df, array_ops.fill(self.batch_shape(), 2.0)), var, array_ops.fill(self.batch_shape(), inf, name="inf"), ) if self.allow_nan_stats: nan = np.array(np.nan, dtype=self.dtype.as_numpy_dtype()) return math_ops.select( math_ops.greater(self.df, self._ones()), result_where_defined, array_ops.fill(self.batch_shape(), nan, name="nan"), ) else: return control_flow_ops.with_dependencies( [ check_ops.assert_less( array_ops.ones((), dtype=self.dtype), self.df, message="variance not defined for components of df <= 1", ) ], result_where_defined, )
def _safe_div(numerator, denominator, name="value"): """Computes a safe divide which returns 0 if the denominator is zero. Note that the function contains an additional conditional check that is necessary for avoiding situations where the loss is zero causing NaNs to creep into the gradient computation. Args: numerator: An arbitrary `Output`. denominator: An `Output` whose shape matches `numerator` and whose values are assumed to be non-negative. name: An optional name for the returned op. Returns: The element-wise value of the numerator divided by the denominator. """ return math_ops.select(math_ops.greater(denominator, 0), math_ops.div( numerator, math_ops.select( math_ops.equal(denominator, 0), array_ops.ones_like(denominator), denominator)), array_ops.zeros_like(numerator), name=name)
def cdf(self, x, name="cdf"): """CDF of observations in `x` under these Uniform distribution(s). Args: x: tensor of dtype `dtype`, must be broadcastable with `a` and `b`. name: The name to give this op. Returns: cdf: tensor of dtype `dtype`, the CDFs of `x`. If `x` is `nan`, will return `nan`. """ with ops.op_scope([self.a, self.b, x], self.name): with ops.name_scope(name): x = ops.convert_to_tensor(x, name="x") if x.dtype != self.dtype: raise TypeError("Input x dtype does not match dtype: %s vs. %s" % (x.dtype, self.dtype)) broadcasted_x = x * self._ones() return math_ops.select(broadcasted_x < self.a, array_ops.zeros_like(broadcasted_x), math_ops.select(broadcasted_x >= self.b, array_ops.ones_like(broadcasted_x), (broadcasted_x - self.a) / self.range))
def copy_through(): # Use broadcasting select to determine which values should get # the previous state & zero output, and which values should get # a calculated state & output. copy_cond = (time >= sequence_length) return (math_ops.select(copy_cond, zero_output, output), math_ops.select(copy_cond, state, new_state))
def _copy_some_through(new_output, new_state): # Use broadcasting select to determine which values should get # the previous state & zero output, and which values should get # a calculated state & output. copy_cond = (time >= sequence_length) return (math_ops.select(copy_cond, zero_output, new_output), math_ops.select(copy_cond, state, new_state))
def pixelwise_weighted_binary_crossentropy(y_true, y_pred): ''' This function calculates the pixel-wise weighted, binary cross-entropy value between the prediction (y_pred) and the pixel-wise weight map, which is unstacked from y_true. On the Gauss ssh server, tf.log must be written as tf.math.log ''' try: # The weights are passed as part of the y_true tensor: [seg, weight] = tf.unstack(y_true, 2, axis=-1) seg = tf.expand_dims(seg, -1) weight = tf.expand_dims(weight, -1) except: pass epsilon = tf.convert_to_tensor(K.epsilon(), y_pred.dtype.base_dtype) y_pred = tf.clip_by_value(y_pred, epsilon, 1. - epsilon) y_pred = tf.math.log(y_pred / (1 - y_pred)) zeros = array_ops.zeros_like(y_pred, dtype=y_pred.dtype) cond = (y_pred >= zeros) relu_logits = math_ops.select(cond, y_pred, zeros) neg_abs_logits = math_ops.select(cond, -y_pred, y_pred) entropy = math_ops.add(relu_logits - y_pred*seg, math_ops.log1p(math_ops.exp(neg_abs_logits)), name=None) # This is essentially the only part that is different from the Keras code: return K.mean(math_ops.multiply(weight, entropy), axis=-1)
def _survival_function(self, y): lower_cutoff = self._lower_cutoff upper_cutoff = self._upper_cutoff # Recall the promise: # survival_function(y) := P[Y > y] # = 0, if y >= upper_cutoff, # = 1, if y < lower_cutoff, # = P[X > y], otherwise. # P[Y > j] = P[ceiling(Y) > j] since mass is only at integers, not in # between. j = math_ops.ceil(y) # P[X > j], used when lower_cutoff < X < upper_cutoff. result_so_far = self.distribution.survival_function(j) # Broadcast, because it's possible that this is a single distribution being # evaluated on a number of samples, or something like that. j += array_ops.zeros_like(result_so_far) # Re-define values at the cutoffs. if lower_cutoff is not None: result_so_far = math_ops.select(j < lower_cutoff, array_ops.ones_like(result_so_far), result_so_far) if upper_cutoff is not None: result_so_far = math_ops.select( j >= upper_cutoff, array_ops.zeros_like(result_so_far), result_so_far) return result_so_far
def _log_cdf(self, y): lower_cutoff = self._lower_cutoff upper_cutoff = self._upper_cutoff # Recall the promise: # cdf(y) := P[Y <= y] # = 1, if y >= upper_cutoff, # = 0, if y < lower_cutoff, # = P[X <= y], otherwise. # P[Y <= j] = P[floor(Y) <= j] since mass is only at integers, not in # between. j = math_ops.floor(y) result_so_far = self.distribution.log_cdf(j) # Broadcast, because it's possible that this is a single distribution being # evaluated on a number of samples, or something like that. j += array_ops.zeros_like(result_so_far) # Re-define values at the cutoffs. if lower_cutoff is not None: neg_inf = -np.inf * array_ops.ones_like(result_so_far) result_so_far = math_ops.select(j < lower_cutoff, neg_inf, result_so_far) if upper_cutoff is not None: result_so_far = math_ops.select( j >= upper_cutoff, array_ops.zeros_like(result_so_far), result_so_far) return result_so_far
def _survival_function(self, y): lower_cutoff = self._lower_cutoff upper_cutoff = self._upper_cutoff # Recall the promise: # survival_function(y) := P[Y > y] # = 0, if y >= upper_cutoff, # = 1, if y < lower_cutoff, # = P[X > y], otherwise. # P[Y > j] = P[ceiling(Y) > j] since mass is only at integers, not in # between. j = math_ops.ceil(y) # P[X > j], used when lower_cutoff < X < upper_cutoff. result_so_far = self.distribution.survival_function(j) # Broadcast, because it's possible that this is a single distribution being # evaluated on a number of samples, or something like that. j += array_ops.zeros_like(result_so_far) # Re-define values at the cutoffs. if lower_cutoff is not None: result_so_far = math_ops.select(j < lower_cutoff, array_ops.ones_like(result_so_far), result_so_far) if upper_cutoff is not None: result_so_far = math_ops.select(j >= upper_cutoff, array_ops.zeros_like(result_so_far), result_so_far) return result_so_far
def _copy_some_through(new_output, new_state): # Use broadcasting select to determine which values should get # the previous state & zero output, and which values should get # a calculated state & output. copy_cond = time >= sequence_length return [math_ops.select(copy_cond, zero_output, new_output)] + [ math_ops.select(copy_cond, old_s, new_s) for (old_s, new_s) in zip(state, new_state) ]
def _prob(self, x): broadcasted_x = x * array_ops.ones(self.batch_shape()) return math_ops.select( math_ops.is_nan(broadcasted_x), broadcasted_x, math_ops.select( math_ops.logical_or(broadcasted_x < self.a, broadcasted_x > self.b), array_ops.zeros_like(broadcasted_x), (1. / self.range()) * array_ops.ones_like(broadcasted_x)))
def pick_vector(cond, true_vector, false_vector, name="pick_vector"): """Picks possibly different length row `Tensor`s based on condition. Value `Tensor`s should have exactly one dimension. If `cond` is a python Boolean or `tf.constant` then either `true_vector` or `false_vector` is immediately returned. I.e., no graph nodes are created and no validation happens. Args: cond: `Tensor`. Must have `dtype=tf.bool` and be scalar. true_vector: `Tensor` of one dimension. Returned when cond is `True`. false_vector: `Tensor` of one dimension. Returned when cond is `False`. name: `String`. The name to give this op. Example: ```python pick_vector(tf.less(0, 5), tf.range(10, 12), tf.range(15, 18)) # result is tensor: [10, 11]. pick_vector(tf.less(5, 0), tf.range(10, 12), tf.range(15, 18)) # result is tensor: [15, 16, 17]. ``` Returns: true_or_false_vector: `Tensor`. Raises: TypeError: if `cond.dtype != tf.bool` TypeError: if `cond` is not a constant and `true_vector.dtype != false_vector.dtype` """ with ops.op_scope((cond, true_vector, false_vector), name): cond = ops.convert_to_tensor(cond, name="cond") if cond.dtype != dtypes.bool: raise TypeError("%s.dtype=%s which is not %s" % (cond.name, cond.dtype, dtypes.bool)) cond_value_static = tensor_util.constant_value(cond) if cond_value_static is not None: return true_vector if cond_value_static else false_vector true_vector = ops.convert_to_tensor(true_vector, name="true_vector") false_vector = ops.convert_to_tensor(false_vector, name="false_vector") if true_vector.dtype != false_vector.dtype: raise TypeError( "%s.dtype=%s does not match %s.dtype=%s" % (true_vector.name, true_vector.dtype, false_vector.name, false_vector.dtype)) n = array_ops.shape(true_vector)[0] return array_ops.slice(array_ops.concat(0, (true_vector, false_vector)), [math_ops.select(cond, 0, n)], [math_ops.select(cond, n, -1)])
def pick_vector(cond, true_vector, false_vector, name="pick_vector"): """Picks possibly different length row `Output`s based on condition. Value `Output`s should have exactly one dimension. If `cond` is a python Boolean or `tf.constant` then either `true_vector` or `false_vector` is immediately returned. I.e., no graph nodes are created and no validation happens. Args: cond: `Output`. Must have `dtype=tf.bool` and be scalar. true_vector: `Output` of one dimension. Returned when cond is `True`. false_vector: `Output` of one dimension. Returned when cond is `False`. name: `String`. The name to give this op. Example: ```python pick_vector(tf.less(0, 5), tf.range(10, 12), tf.range(15, 18)) # result is tensor: [10, 11]. pick_vector(tf.less(5, 0), tf.range(10, 12), tf.range(15, 18)) # result is tensor: [15, 16, 17]. ``` Returns: true_or_false_vector: `Output`. Raises: TypeError: if `cond.dtype != tf.bool` TypeError: if `cond` is not a constant and `true_vector.dtype != false_vector.dtype` """ with ops.name_scope(name, values=(cond, true_vector, false_vector)): cond = ops.convert_to_tensor(cond, name="cond") if cond.dtype != dtypes.bool: raise TypeError("%s.dtype=%s which is not %s" % (cond.name, cond.dtype, dtypes.bool)) cond_value_static = tensor_util.constant_value(cond) if cond_value_static is not None: return true_vector if cond_value_static else false_vector true_vector = ops.convert_to_tensor(true_vector, name="true_vector") false_vector = ops.convert_to_tensor(false_vector, name="false_vector") if true_vector.dtype != false_vector.dtype: raise TypeError( "%s.dtype=%s does not match %s.dtype=%s" % (true_vector.name, true_vector.dtype, false_vector.name, false_vector.dtype)) n = array_ops.shape(true_vector)[0] return array_ops.slice(array_ops.concat(0, (true_vector, false_vector)), [math_ops.select(cond, 0, n)], [math_ops.select(cond, n, -1)])
def _ndtr(x): """Implements ndtr core logic.""" half_sqrt_2 = constant_op.constant( 0.5 * math.sqrt(2.), dtype=x.dtype, name="half_sqrt_2") w = x * half_sqrt_2 z = math_ops.abs(w) y = math_ops.select(math_ops.less(z, half_sqrt_2), 1. + math_ops.erf(w), math_ops.select(math_ops.greater(w, 0.), 2. - math_ops.erfc(z), math_ops.erfc(z))) return 0.5 * y
def _ndtr(x): """Implements ndtr core logic.""" half_sqrt_2 = constant_op.constant(0.5 * math.sqrt(2.), dtype=x.dtype, name="half_sqrt_2") w = x * half_sqrt_2 z = math_ops.abs(w) y = math_ops.select( math_ops.less(z, half_sqrt_2), 1. + math_ops.erf(w), math_ops.select(math_ops.greater(w, 0.), 2. - math_ops.erfc(z), math_ops.erfc(z))) return 0.5 * y
def my_weighted_cross_entropy_with_logits(logits, targets, pos_weight, name=None): with ops.name_scope(name, "logistic_loss", [logits, targets]) as name: logits = ops.convert_to_tensor(logits, name="logits") targets = ops.convert_to_tensor(targets, name="targets") try: targets.get_shape().merge_with(logits.get_shape()) except ValueError: raise ValueError( "logits and targets must have the same shape (%s vs %s)" % (logits.get_shape(), targets.get_shape())) # The logistic loss formula from above is # (1 - z) * x + (1 + (q - 1) * z) * log(1 + exp(-x)) # For x < 0, a more numerically stable formula is # (1 - z) * x + (1 + (q - 1) * z) * log(1 + exp(x)) - l * x # To avoid branching, we use the combined version # (1 - z) * x + l * (log(1 + exp(-abs(x))) + max(-x, 0)) log_weight = 1 + (pos_weight - 1) * targets # log_weight = 1 + (pos_weight * targets) - targets # first_part = (1 - targets) * logits # # print 'first_part\n', first_part.eval(session=tf.Session()) # second_part = log_weight * (math_ops.log(1 + math_ops.exp(-math_ops.abs(logits))) + nn_ops.relu(logits)) # value = math_ops.add(first_part, second_part, name=name) # log_weight = 1 + (pos_weight - 1) * targets # return math_ops.add( # (1 - targets) * logits, # log_weight * (math_ops.log(1 + math_ops.exp(-math_ops.abs(logits))) + # nn_ops.relu(-logits)), # name=name) # another test zeros = array_ops.zeros_like(logits, dtype=logits.dtype) cond = (logits >= zeros) relu_logits = math_ops.select(cond, logits, zeros) neg_abs_logits = math_ops.select(cond, -logits, logits) primeira_parte = relu_logits - logits * targets segunda_parte = log_weight * ( math_ops.log(1 + math_ops.exp(neg_abs_logits)) + nn_ops.relu(-logits)) final2 = math_ops.add(primeira_parte, segunda_parte, name=name) return final2
def variance(self, name="variance"): """Variance of each batch member. Variance for inverse gamma is defined only for `alpha > 2`. If `self.allow_nan_stats` is `False`, an exception will be raised rather than returning `NaN`. Args: name: A name to give this op. Returns: The variance for every batch member, a `Tensor` with same `dtype` as self. """ alpha = self._alpha beta = self._beta with ops.name_scope(self.name): with ops.op_scope([alpha, beta], name): var_if_defined = (math_ops.square(self._beta) / (math_ops.square(self._alpha - 1.0) * (self._alpha - 2.0))) if self.allow_nan_stats: alpha_gt_2 = alpha > 2.0 nan = np.nan * self._ones() return math_ops.select(alpha_gt_2, var_if_defined, nan) else: two = constant_op.constant(2.0, dtype=self.dtype) return control_flow_ops.with_dependencies([ check_ops.assert_less( two, alpha, message= "variance not defined for components of alpha <= 2" ) ], var_if_defined)
def mean(self, name="mean"): """Mean of each batch member. The mean of an inverse gamma distribution is `beta / (alpha - 1)`, when `alpha > 1`, and `NaN` otherwise. If `self.allow_nan_stats` is `False`, an exception will be raised rather than returning `NaN` Args: name: A name to give this op. Returns: The mean for every batch member, a `Tensor` with same `dtype` as self. """ alpha = self._alpha beta = self._beta with ops.name_scope(self.name): with ops.op_scope([alpha, beta], name): mean_if_defined = beta / (alpha - 1.0) if self.allow_nan_stats: alpha_gt_1 = alpha > 1.0 nan = np.nan * self._ones() return math_ops.select(alpha_gt_1, mean_if_defined, nan) else: one = constant_op.constant(1.0, dtype=self.dtype) return control_flow_ops.with_dependencies([ check_ops.assert_less( one, alpha, message= "mean not defined for components of alpha <= 1") ], mean_if_defined)
def mode(self, name="mode"): """Mode of the distribution. Note that the mode for the Beta distribution is only defined when `a > 1`, `b > 1`. This returns the mode when `a > 1` and `b > 1`, and NaN otherwise. If `self.allow_nan_stats` is `False`, an exception will be raised rather than returning `NaN`. Args: name: The name for this op. Returns: Mode of the Beta distribution. """ with ops.name_scope(self.name): with ops.op_scope([self._a, self._b, self._a_b_sum], name): a = self._a b = self._b a_b_sum = self._a_b_sum one = constant_op.constant(1, self.dtype) mode = (a - 1)/ (a_b_sum - 2) if self.allow_nan_stats: return math_ops.select( math_ops.logical_and( math_ops.greater(a, 1), math_ops.greater(b, 1)), mode, (constant_op.constant(float("NaN"), dtype=self.dtype) * array_ops.ones_like(a_b_sum, dtype=self.dtype))) else: return control_flow_ops.with_dependencies([ check_ops.assert_less(one, a), check_ops.assert_less(one, b)], mode)
def mean(self, name="mean"): with ops.name_scope(self.name): with ops.op_scope([self._mu], name): df_gt_1 = self._df > self._ones() result_if_defined = self._mu * self._ones() nan = np.nan + self._zeros() return math_ops.select(df_gt_1, result_if_defined, nan)
def _mode(self): mode = (self.a - 1.0) / (self.a_b_sum - 2.0) if self.allow_nan_stats: nan = np.array(np.nan, dtype=self.dtype.as_numpy_dtype()) return math_ops.select( math_ops.logical_and(math_ops.greater(self.a, 1.0), math_ops.greater(self.b, 1.0)), mode, array_ops.fill(self.batch_shape(), nan, name="nan"), ) else: return control_flow_ops.with_dependencies( [ check_ops.assert_less( array_ops.ones((), dtype=self.dtype), self.a, message="Mode not defined for components of a <= 1.", ), check_ops.assert_less( array_ops.ones((), dtype=self.dtype), self.b, message="Mode not defined for components of b <= 1.", ), ], mode, )
def variance(self, name="variance"): """Variance of each batch member. Variance for inverse gamma is defined only for `alpha > 2`. If `self.allow_nan_stats` is `False`, an exception will be raised rather than returning `NaN`. Args: name: A name to give this op. Returns: The variance for every batch member, a `Tensor` with same `dtype` as self. """ alpha = self._alpha beta = self._beta with ops.name_scope(self.name): with ops.op_scope([alpha, beta], name): var_if_defined = (math_ops.square(self._beta) / (math_ops.square(self._alpha - 1.0) * (self._alpha - 2.0))) if self.allow_nan_stats: alpha_gt_2 = alpha > 2.0 nan = np.nan * self._ones() return math_ops.select(alpha_gt_2, var_if_defined, nan) else: two = constant_op.constant(2.0, dtype=self.dtype) return control_flow_ops.with_dependencies( [check_ops.assert_less( two, alpha, message="variance not defined for components of alpha <= 2")], var_if_defined)
def mean(self, name="mean"): """Mean of each batch member. The mean of an inverse gamma distribution is `beta / (alpha - 1)`, when `alpha > 1`, and `NaN` otherwise. If `self.allow_nan_stats` is `False`, an exception will be raised rather than returning `NaN` Args: name: A name to give this op. Returns: The mean for every batch member, a `Tensor` with same `dtype` as self. """ alpha = self._alpha beta = self._beta with ops.name_scope(self.name): with ops.op_scope([alpha, beta], name): mean_if_defined = beta / (alpha - 1.0) if self.allow_nan_stats: alpha_gt_1 = alpha > 1.0 nan = np.nan * self._ones() return math_ops.select(alpha_gt_1, mean_if_defined, nan) else: one = constant_op.constant(1.0, dtype=self.dtype) return control_flow_ops.with_dependencies( [check_ops.assert_less( one, alpha, message="mean not defined for components of alpha <= 1")], mean_if_defined)
def mode(self, name="mode"): """Mode of each batch member. The mode of a gamma distribution is `(alpha - 1) / beta` when `alpha > 1`, and `NaN` otherwise. If `self.allow_nan_stats` is `False`, an exception will be raised rather than returning `NaN`. Args: name: A name to give this op. Returns: The mode for every batch member, a `Tensor` with same `dtype` as self. """ alpha = self._alpha beta = self._beta with ops.name_scope(self.name): with ops.name_scope(name, values=[alpha, beta]): mode_if_defined = (alpha - 1.0) / beta if self.allow_nan_stats: alpha_ge_1 = alpha >= 1.0 nan = np.nan * self._ones() return math_ops.select(alpha_ge_1, mode_if_defined, nan) else: one = constant_op.constant(1.0, dtype=self.dtype) return control_flow_ops.with_dependencies([ check_ops.assert_less( one, alpha, message= "mode not defined for components of alpha <= 1") ], mode_if_defined)
def gradient_clipping(grads_and_vars): """Internal function for adaptive clipping.""" grads, variables = zip(*grads_and_vars) norm = clip_ops.global_norm(grads) max_norm, log_mean = _adaptive_max_norm( norm, std_factor, decay, global_step, epsilon, name) # reports the max gradient norm for debugging if report_summary: summary.scalar("global_norm/adaptive_max_gradient_norm", max_norm) # factor will be 1. if norm is smaller than max_norm factor = math_ops.select(norm < max_norm, array_ops.ones_like(norm), math_ops.exp(log_mean) / norm) if static_max_norm is not None: factor = math_ops.minimum(static_max_norm / norm, factor) # apply factor clipped_grads = [] for grad in grads: if grad is None: clipped_grads.append(None) elif isinstance(grad, ops.IndexedSlices): clipped_grads.append(ops.IndexedSlices( grad.values * factor, grad.indices, grad.dense_shape)) else: clipped_grads.append(grad * factor) return list(zip(clipped_grads, variables))
def variance(self, name="variance"): """Variance of each batch member. Variance for inverse gamma is defined only for `alpha > 2`. If `self.strict_statistics` is `True`, an exception will be raised rather than returning `NaN`. Args: name: A name to give this op. Returns: The variance for every batch member, a `Tensor` with same `dtype` as self. """ alpha = self._alpha beta = self._beta with ops.name_scope(self.name): with ops.op_scope([alpha, beta], name): var_if_defined = math_ops.square(self._beta) / ( math_ops.square(self._alpha - 1.0) * (self._alpha - 2.0) ) if self.strict_statistics: two = ops.convert_to_tensor(2.0, dtype=self.dtype) return control_flow_ops.with_dependencies([check_ops.assert_less(two, alpha)], var_if_defined) else: alpha_gt_2 = alpha > 2.0 nan = np.nan * self._ones() return math_ops.select(alpha_gt_2, var_if_defined, nan)
def mean(self, name="mean"): """Mean of the distribution. The mean of Student's T equals `mu` if `df > 1`, otherwise it is `NaN`. If `self.allow_nan_stats=False`, then an exception will be raised rather than returning `NaN`. Args: name: A name to give this op. Returns: The mean for every batch member, a `Tensor` with same `dtype` as self. """ with ops.name_scope(self.name): with ops.name_scope(name, values=[self._mu]): result_if_defined = self._mu * self._ones() if self.allow_nan_stats: df_gt_1 = self._df > self._ones() nan = np.nan + self._zeros() return math_ops.select(df_gt_1, result_if_defined, nan) else: one = constant_op.constant(1.0, dtype=self.dtype) return control_flow_ops.with_dependencies([ check_ops.assert_less( one, self._df, message="mean not defined for components of df <= 1" ) ], result_if_defined)
def mode(self, name="mode"): """Mode of each batch member. The mode of a gamma distribution is `(alpha - 1) / beta` when `alpha > 1`, and `NaN` otherwise. If `self.allow_nan_stats` is `False`, an exception will be raised rather than returning `NaN`. Args: name: A name to give this op. Returns: The mode for every batch member, a `Tensor` with same `dtype` as self. """ alpha = self._alpha beta = self._beta with ops.name_scope(self.name): with ops.name_scope(name, values=[alpha, beta]): mode_if_defined = (alpha - 1.0) / beta if self.allow_nan_stats: alpha_ge_1 = alpha >= 1.0 nan = np.nan * self._ones() return math_ops.select(alpha_ge_1, mode_if_defined, nan) else: one = constant_op.constant(1.0, dtype=self.dtype) return control_flow_ops.with_dependencies( [check_ops.assert_less( one, alpha, message="mode not defined for components of alpha <= 1" )], mode_if_defined)
def _Solve(a, b, c): """Return solution of a quadratic minimization. The optimization equation is: f(a, b, c) = argmin_w{1/2 * a * w^2 + b * w + c * |w|} we get optimal solution w*: w* = -(b - sign(b)*c)/a if |b| > c else w* = 0 REQUIRES: Dimensionality of a and b must be same Args: a: A Tensor b: A Tensor c: A Tensor with one element. Returns: A Tensor w, which is solution for the equation """ with ops.name_scope("solve_" + b.op.name): c = ops.convert_to_tensor(c) k = array_ops.fill(array_ops.shape(b), c) zero_t = array_ops.zeros(array_ops.shape(b), dtype=b.dtype) w = (c * math_ops.sign(b) - b) / a w = math_ops.select(math_ops.less(math_ops.abs(b), k), zero_t, w) return w
def mean(self, name="mean"): """Mean of the distribution. The mean of Student's T equals `mu` if `df > 1`, otherwise it is `NaN`. If `self.allow_nan_stats=False`, then an exception will be raised rather than returning `NaN`. Args: name: A name to give this op. Returns: The mean for every batch member, a `Tensor` with same `dtype` as self. """ with ops.name_scope(self.name): with ops.name_scope(name, values=[self._mu]): result_if_defined = self._mu * self._ones() if self.allow_nan_stats: df_gt_1 = self._df > self._ones() nan = np.nan + self._zeros() return math_ops.select(df_gt_1, result_if_defined, nan) else: one = constant_op.constant(1.0, dtype=self.dtype) return control_flow_ops.with_dependencies( [check_ops.assert_less( one, self._df, message="mean not defined for components of df <= 1" )], result_if_defined)
def _MaximumMinimumGrad(op, grad, selector_op): """Factor out the code for the gradient of Maximum or Minimum.""" x = op.inputs[0] y = op.inputs[1] gdtype = grad.dtype sx = array_ops.shape(x) sy = array_ops.shape(y) gradshape = array_ops.shape(grad) zeros = array_ops.zeros(gradshape, gdtype) xmask = selector_op(x, y) rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) xgrad = math_ops.select(xmask, grad, zeros) ygrad = math_ops.select(math_ops.logical_not(xmask), grad, zeros) gx = array_ops.reshape(math_ops.reduce_sum(xgrad, rx), sx) gy = array_ops.reshape(math_ops.reduce_sum(ygrad, ry), sy) return (gx, gy)
def mean(self, name="mean"): """Mean of the distribution. The mean of Student's T equals `mu` if `df > 1`, otherwise it is `NaN`. If `self.strict_statistics=True`, then an exception will be raised rather than returning `NaN`. Args: name: A name to give this op. Returns: The mean for every batch member, a `Tensor` with same `dtype` as self. """ with ops.name_scope(self.name): with ops.op_scope([self._mu], name): result_if_defined = self._mu * self._ones() if self.strict_statistics: one = ops.convert_to_tensor(1.0, dtype=self.dtype) return control_flow_ops.with_dependencies( [check_ops.assert_less(one, self._df)], result_if_defined) else: df_gt_1 = self._df > self._ones() nan = np.nan + self._zeros() return math_ops.select(df_gt_1, result_if_defined, nan)
def mode(self, name="mode"): """Mode of the distribution. Note that the mode for the Beta distribution is only defined when `alpha > 1`. This returns the mode when `alpha > 1`, and NaN otherwise. If `self.allow_nan_stats` is `False`, an exception will be raised rather than returning `NaN`. Args: name: The name for this op. Returns: Mode of the Dirichlet distribution. """ with ops.name_scope(self.name): with ops.op_scope([self._alpha, self._alpha_0], name): one = constant_op.constant(1, self.dtype) mode = (self._alpha - 1)/ ( array_ops.expand_dims(self._alpha_0, -1) - math_ops.cast( self.event_shape()[0], self.dtype)) if self.allow_nan_stats: return math_ops.select( math_ops.greater(self._alpha, 1), mode, (constant_op.constant(float("NaN"), dtype=self.dtype) * array_ops.ones_like(self._alpha, dtype=self.dtype))) else: return control_flow_ops.with_dependencies([ check_ops.assert_less( one, self._alpha, message="mode not defined for components of alpha <= 1") ], mode)
def mode(self, name="mode"): """Mode of the distribution. Note that the mode for the Beta distribution is only defined when `a > 1`, `b > 1`. This returns the mode when `a > 1` and `b > 1`, and NaN otherwise. If `self.allow_nan_stats` is `False`, an exception will be raised rather than returning `NaN`. Args: name: The name for this op. Returns: Mode of the Beta distribution. """ with ops.name_scope(self.name): with ops.op_scope([self._a, self._b, self._a_b_sum], name): a = self._a b = self._b a_b_sum = self._a_b_sum one = constant_op.constant(1, self.dtype) mode = (a - 1) / (a_b_sum - 2) if self.allow_nan_stats: return math_ops.select( math_ops.logical_and(math_ops.greater(a, 1), math_ops.greater(b, 1)), mode, (constant_op.constant(float("NaN"), dtype=self.dtype) * array_ops.ones_like(a_b_sum, dtype=self.dtype))) else: return control_flow_ops.with_dependencies([ check_ops.assert_less(one, a), check_ops.assert_less(one, b) ], mode)
def mode(self, name="mode"): """Mode of the distribution. Note that the mode for the Beta distribution is only defined when `alpha > 1`. This returns the mode when `alpha > 1`, and NaN otherwise. If `self.allow_nan_stats` is `False`, an exception will be raised rather than returning `NaN`. Args: name: The name for this op. Returns: Mode of the Dirichlet distribution. """ with ops.name_scope(self.name): with ops.name_scope(name, values=[self._alpha, self._alpha_0]): one = constant_op.constant(1, self.dtype) mode = (self._alpha - 1)/ ( array_ops.expand_dims(self._alpha_0, -1) - math_ops.cast( self.event_shape()[0], self.dtype)) if self.allow_nan_stats: return math_ops.select( math_ops.greater(self._alpha, 1), mode, (constant_op.constant(float("NaN"), dtype=self.dtype) * array_ops.ones_like(self._alpha, dtype=self.dtype))) else: return control_flow_ops.with_dependencies([ check_ops.assert_less( one, self._alpha, message="mode not defined for components of alpha <= 1") ], mode)
def mode(self, name="mode"): """Mode of each batch member. The mode of a gamma distribution is `(alpha - 1) / beta` when `alpha > 1`, and `NaN` otherwise. If `self.strict_statistics` is `True`, an exception will be raised rather than returning `NaN`. Args: name: A name to give this op. Returns: The mode for every batch member, a `Tensor` with same `dtype` as self. """ alpha = self._alpha beta = self._beta with ops.name_scope(self.name): with ops.op_scope([alpha, beta], name): mode_if_defined = (alpha - 1.0) / beta if self.strict_statistics: one = ops.convert_to_tensor(1.0, dtype=self.dtype) return control_flow_ops.with_dependencies( [check_ops.assert_less(one, alpha)], mode_if_defined) else: alpha_ge_1 = alpha >= 1.0 nan = np.nan * self._ones() return math_ops.select(alpha_ge_1, mode_if_defined, nan)
def variance(self, name="variance"): with ops.name_scope(self.name): with ops.op_scope([self._df, self._sigma], name): return math_ops.select( (self._zeros() + self._df > 2), self._zeros() + math_ops.square(self._sigma) * self._df / (self._df - 2), self._zeros() + np.nan)
def variance(self, name="var"): with ops.name_scope(self.name): return math_ops.select( (self._zeros() + self._df > 2), self._zeros() + math_ops.square(self._sigma) * self._df / (self._df - 2), self._zeros() + np.inf, name=name)
def _SegmentMaxGrad(op, grad): """Gradient for SegmentMax.""" zeros = array_ops.zeros(array_ops.shape(op.inputs[0]), dtype=op.inputs[0].dtype) gathered_grads = array_ops.gather(grad, op.inputs[1]) gathered_outputs = array_ops.gather(op.outputs[0], op.inputs[1]) return math_ops.select(math_ops.less(op.inputs[0], gathered_outputs), zeros, gathered_grads), None
def _pick_scalar_condition(pred, cond_true, cond_false): """Convenience function which chooses the condition based on the predicate.""" # Note: This function is only valid if all of pred, cond_true, and cond_false # are scalars. This means its semantics are arguably more like tf.cond than # tf.select even though we use tf.select to implement it. pred_static = tensor_util.constant_value(pred) if pred_static is None: return math_ops.select(pred, cond_true, cond_false) return cond_true if pred_static else cond_false
def _PowGrad(op, grad): """Returns grad * (y*x^(y-1), z*log(x)).""" x = op.inputs[0] y = op.inputs[1] z = op.outputs[0] sx = array_ops.shape(x) sy = array_ops.shape(y) rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) gx = array_ops.reshape(math_ops.reduce_sum(grad * y * math_ops.pow(x, y - 1), rx), sx) # Avoid false singularity at x = 0 if x.dtype.is_complex: # real(x) < 0 is fine for the complex case log_x = math_ops.select(math_ops.not_equal(x, 0), math_ops.log(x), array_ops.zeros_like(x)) else: # There's no sensible real value to return if x < 0, so return 0 log_x = math_ops.select(x > 0, math_ops.log(x), array_ops.zeros_like(x)) gy = array_ops.reshape(math_ops.reduce_sum(grad * z * log_x, ry), sy) return gx, gy
def _mode(self): s = self.df - self.dimension - 1. s = math_ops.select( math_ops.less(s, 0.), constant_op.constant(float("NaN"), dtype=self.dtype, name="nan"), s) if self.cholesky_input_output_matrices: return math_ops.sqrt(s) * self.scale_operator_pd.sqrt_to_dense() return s * self.scale_operator_pd.to_dense()
def _safe_mean(losses, num_present): """Computes a safe mean of the losses. Args: losses: A tensor whose elements contain individual loss measurements. num_present: The number of measurable losses in the tensor. Returns: A scalar representing the mean of the losses. If `num_present` is zero, then zero is returned. """ total_loss = math_ops.reduce_sum(losses) return math_ops.select( math_ops.greater(num_present, 0), math_ops.div(total_loss, math_ops.select(math_ops.equal(num_present, 0), 1.0, num_present)), array_ops.zeros_like(total_loss), name="value", )
def _sample_n(self, n, seed=None): lower_cutoff = self._lower_cutoff upper_cutoff = self._upper_cutoff with ops.name_scope("transform"): n = ops.convert_to_tensor(n, name="n") x_samps = self.base_distribution.sample_n(n=n, seed=seed) ones = array_ops.ones_like(x_samps) # Snap values to the intervals (j - 1, j]. result_so_far = math_ops.ceil(x_samps) if lower_cutoff is not None: result_so_far = math_ops.select(result_so_far < lower_cutoff, lower_cutoff * ones, result_so_far) if upper_cutoff is not None: result_so_far = math_ops.select(result_so_far > upper_cutoff, upper_cutoff * ones, result_so_far) return result_so_far
def _safe_mean(losses, num_present): """Computes a safe mean of the losses. Args: losses: A tensor whose elements contain individual loss measurements. num_present: The number of measurable losses in the tensor. Returns: A scalar representing the mean of the losses. If `num_present` is zero, then zero is returned. """ total_loss = math_ops.reduce_sum(losses) return math_ops.select( math_ops.greater(num_present, 0), math_ops.div(total_loss, math_ops.select( math_ops.equal(num_present, 0), 1.0, num_present)), array_ops.zeros_like(total_loss), name="value")
def variance(self, name="variance"): """Variance of the distribution. Variance for Student's T equals ``` df / (df - 2), when df > 2 infinity, when 1 < df <= 2 NaN, when df <= 1 ``` The NaN state occurs because mean is undefined for `df <= 1`, and if `self.allow_nan_stats` is `False`, an exception will be raised if any batch members fall into this state. Args: name: A name for this op. Returns: The variance for every batch member, a `Tensor` with same `dtype` as self. """ with ops.name_scope(self.name): with ops.name_scope(name, values=[self._df, self._sigma]): result_where_finite = ( self._zeros() + math_ops.square(self._sigma) * self._df / (self._df - 2)) # When 1 < df <= 2, variance is infinite. result_where_defined = math_ops.select( self._zeros() + self._df > 2, result_where_finite, self._zeros() + np.inf) if self.allow_nan_stats: return math_ops.select((self._zeros() + self._df > 1), result_where_defined, self._zeros() + np.nan) else: one = constant_op.constant(1.0, dtype=self.dtype) return control_flow_ops.with_dependencies([ check_ops.assert_less( one, self._df, message= "variance not defined for components of df <= 1") ], result_where_defined)