def call(self, inputs, count_weights=None): if isinstance(inputs, (list, np.ndarray)): inputs = tf.convert_to_tensor(inputs) if inputs.shape.rank == 1: inputs = tf.compat.v1.expand_dims(inputs, 1) if count_weights is not None and self.output_mode != COUNT: raise ValueError("count_weights is not used in " "`output_mode='multi_hot'`. Please pass a single input.") out_depth = self.num_tokens multi_hot_output = (self.output_mode == MULTI_HOT) if isinstance(inputs, tf.SparseTensor): max_value = tf.reduce_max(inputs.values) min_value = tf.reduce_min(inputs.values) else: max_value = tf.reduce_max(inputs) min_value = tf.reduce_min(inputs) condition = tf.logical_and( tf.greater( tf.cast(out_depth, max_value.dtype), max_value), tf.greater_equal( min_value, tf.cast(0, min_value.dtype))) tf.Assert(condition, [ "Input values must be in the range 0 <= values < num_tokens" " with num_tokens={}".format(out_depth) ]) if self.sparse: return sparse_bincount(inputs, out_depth, multi_hot_output, count_weights) else: return dense_bincount(inputs, out_depth, multi_hot_output, count_weights)
def call(self, inputs, count_weights=None): inputs = utils.ensure_tensor(inputs) if count_weights is not None: if self.output_mode != COUNT: raise ValueError( "`count_weights` is not used when `output_mode` is not `'count'`. " "Received `count_weights={}`.".format(count_weights)) count_weights = utils.ensure_tensor(count_weights, self.compute_dtype) depth = self.num_tokens if isinstance(inputs, tf.SparseTensor): max_value = tf.reduce_max(inputs.values) min_value = tf.reduce_min(inputs.values) else: max_value = tf.reduce_max(inputs) min_value = tf.reduce_min(inputs) condition = tf.logical_and( tf.greater(tf.cast(depth, max_value.dtype), max_value), tf.greater_equal(min_value, tf.cast(0, min_value.dtype))) assertion = tf.Assert(condition, [ "Input values must be in the range 0 <= values < num_tokens" " with num_tokens={}".format(depth) ]) with tf.control_dependencies([assertion]): return utils.encode_categorical_inputs( inputs, output_mode=self.output_mode, depth=depth, dtype=self.compute_dtype, sparse=self.sparse, count_weights=count_weights)
def matrix_rank(a, tol=None, validate_args=False, name=None): """Compute the matrix rank; the number of non-zero SVD singular values. Arguments: a: (Batch of) `float`-like matrix-shaped `Tensor`(s) which are to be pseudo-inverted. tol: Threshold below which the singular value is counted as 'zero'. Default value: `None` (i.e., `eps * max(rows, cols) * max(singular_val)`). validate_args: When `True`, additional assertions might be embedded in the graph. Default value: `False` (i.e., no graph assertions are added). name: Python `str` prefixed to ops created by this function. Default value: 'matrix_rank'. Returns: matrix_rank: (Batch of) `int32` scalars representing the number of non-zero singular values. """ with tf.name_scope(name or 'matrix_rank'): a = tf.convert_to_tensor(a, dtype_hint=tf.float32, name='a') assertions = _maybe_validate_matrix(a, validate_args) if assertions: with tf.control_dependencies(assertions): a = tf.identity(a) s = tf.linalg.svd(a, compute_uv=False) if tol is None: if tensorshape_util.is_fully_defined(a.shape[-2:]): m = np.max(a.shape[-2:].as_list()) else: m = tf.reduce_max(tf.shape(a)[-2:]) eps = np.finfo(dtype_util.as_numpy_dtype(a.dtype)).eps tol = (eps * tf.cast(m, a.dtype) * tf.reduce_max(s, axis=-1, keepdims=True)) return tf.reduce_sum(tf.cast(s > tol, tf.int32), axis=-1)
def coverage_box(bboxes): y_min, x_min, y_max, x_max = tf.split( value=bboxes, num_or_size_splits=4, axis=1) y_min_coverage = tf.reduce_min(y_min, axis=0) x_min_coverage = tf.reduce_min(x_min, axis=0) y_max_coverage = tf.reduce_max(y_max, axis=0) x_max_coverage = tf.reduce_max(x_max, axis=0) return tf.stack( [y_min_coverage, x_min_coverage, y_max_coverage, x_max_coverage], axis=1)
def get_discriminator_batch_loss(learner_agent_output, env_output, unused_actor_agent_output, unused_actor_action, unused_reward_clipping, unused_discounting, unused_baseline_cost, unused_entropy_cost, num_steps): """Discriminator batch softmax loss with mask.""" # Remove the time_step dimension for each tensor in the result. learner_agent_output = tf.nest.map_structure( lambda t: tf.squeeze(t, axis=0), learner_agent_output) result = learner_agent_output.policy_logits # dict # Compute softmax. # Use stable softmax: softmax(x) = softmax(x+c) for any constant c. # Here we use constant c = max(-x). # Shape of similarity and similarity_mask: [batch, batch]. row_max = tf.reduce_max(result['similarity'], axis=1, keepdims=True) masked_row_exp = tf.exp(result['similarity'] - row_max) * tf.cast( result['similarity_mask'], tf.float32) summed_rows = tf.reduce_sum(masked_row_exp, axis=1) # Shape=[batch] # log(softmax_i). Shape = [batch] loss_by_row = -(tf.linalg.diag_part(result['similarity']) - tf.squeeze(row_max, 1)) + tf.math.log(summed_rows) loss_by_row = loss_by_row * result['labels'] col_max = tf.reduce_max(result['similarity'], axis=0, keepdims=True) masked_col_exp = tf.exp(result['similarity'] - col_max) * tf.cast( result['similarity_mask'], tf.float32) summed_cols = tf.reduce_sum(masked_col_exp, axis=0) # Shape=[batch] tf.debugging.assert_equal(summed_cols.shape, summed_rows.shape) # log(softmax_j). Shape = [batch] loss_by_col = -(tf.linalg.diag_part(result['similarity']) - tf.squeeze(col_max, 0)) + tf.math.log(summed_cols) loss_by_col = loss_by_col * result['labels'] # Shape = [batch] loss = (loss_by_row + loss_by_col) / 2.0 tf.summary.scalar('loss/batch_softmax', tf.reduce_mean(loss), step=num_steps) tf.summary.scalar('labels/num_positive_labels', tf.reduce_sum(result['labels']), step=num_steps) tf.summary.scalar('labels/batch_loss_positive_label_ratio', tf.reduce_mean(result['labels']), step=num_steps) # Add classification loss if set in FLAGS. Shape = [batch]. if FLAGS.use_batch_and_ce_losses: classification_loss = get_discriminator_focal_loss( learner_agent_output, env_output, unused_actor_agent_output, unused_actor_action, unused_reward_clipping, unused_discounting, unused_baseline_cost, unused_entropy_cost, num_steps) # Shape = [batch]. loss = classification_loss + loss * FLAGS.disc_batch_loss_scale return loss
def _match_when_rows_are_non_empty(): """Performs matching when the rows of similarity matrix are non empty. Returns: matches: int32 tensor indicating the row each column matches to. """ # Matches for each column matches = tf.argmax(input=similarity_matrix, axis=0, output_type=tf.int32) # Deal with matched and unmatched threshold if self._matched_threshold is not None: # Get logical indices of ignored and unmatched columns as tf.int64 matched_vals = tf.reduce_max(input_tensor=similarity_matrix, axis=0) below_unmatched_threshold = tf.greater( self._unmatched_threshold, matched_vals) between_thresholds = tf.logical_and( tf.greater_equal(matched_vals, self._unmatched_threshold), tf.greater(self._matched_threshold, matched_vals)) if self._negatives_lower_than_unmatched: matches = self._set_values_using_indicator( matches, below_unmatched_threshold, -1) matches = self._set_values_using_indicator( matches, between_thresholds, -2) else: matches = self._set_values_using_indicator( matches, below_unmatched_threshold, -2) matches = self._set_values_using_indicator( matches, between_thresholds, -1) if self._force_match_for_each_row: similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape( similarity_matrix) force_match_column_ids = tf.argmax(input=similarity_matrix, axis=1, output_type=tf.int32) force_match_column_indicators = tf.one_hot( force_match_column_ids, depth=similarity_matrix_shape[1]) force_match_row_ids = tf.argmax( input=force_match_column_indicators, axis=0, output_type=tf.int32) force_match_column_mask = tf.cast( tf.reduce_max(input_tensor=force_match_column_indicators, axis=0), tf.bool) final_matches = tf.where(force_match_column_mask, force_match_row_ids, matches) return final_matches else: return matches
def call(self, inputs, count_weights=None): if isinstance(inputs, (list, np.ndarray)): inputs = tf.convert_to_tensor(inputs) def expand_dims(inputs, axis): if tf_utils.is_sparse(inputs): return tf.sparse.expand_dims(inputs, axis) else: return tf.compat.v1.expand_dims(inputs, axis) original_shape = inputs.shape # In all cases, we should uprank scalar input to a single sample. if inputs.shape.rank == 0: inputs = expand_dims(inputs, -1) # One hot will unprank only if the final output dimension is not already 1. if self.output_mode == ONE_HOT: if inputs.shape[-1] != 1: inputs = expand_dims(inputs, -1) # TODO(b/190445202): remove output rank restriction. if inputs.shape.rank > 2: raise ValueError( "Received input shape {}, which would result in output rank {}. " "Currently only outputs up to rank 2 are supported.".format( original_shape, inputs.shape.rank)) if count_weights is not None and self.output_mode != COUNT: raise ValueError( "`count_weights` is not used when `output_mode` is not `'count'`. " "Received `count_weights={}`.".format(count_weights)) out_depth = self.num_tokens binary_output = self.output_mode in (MULTI_HOT, ONE_HOT) if isinstance(inputs, tf.SparseTensor): max_value = tf.reduce_max(inputs.values) min_value = tf.reduce_min(inputs.values) else: max_value = tf.reduce_max(inputs) min_value = tf.reduce_min(inputs) condition = tf.logical_and( tf.greater(tf.cast(out_depth, max_value.dtype), max_value), tf.greater_equal(min_value, tf.cast(0, min_value.dtype))) assertion = tf.Assert(condition, [ "Input values must be in the range 0 <= values < num_tokens" " with num_tokens={}".format(out_depth) ]) with tf.control_dependencies([assertion]): if self.sparse: return sparse_bincount(inputs, out_depth, binary_output, count_weights) else: return dense_bincount(inputs, out_depth, binary_output, count_weights)
def prepare_conv_args(filter_shape, rank, strides, padding, dilations, is_transpose=False, validate_args=False): """Sanitizes use provided input.""" padding = _validate_padding(padding) try: rank = int(tf.get_static_value(rank)) except TypeError: raise TypeError('Argument `rank` must be statically known `int`.') valid_rank = {1, 2, 3} if rank not in valid_rank: raise ValueError('Argument `rank` must be in {}.'.format(valid_rank)) filter_shape = prepare_tuple_argument(filter_shape, n=rank, arg_name='filter_shape', validate_args=validate_args) strides = prepare_tuple_argument(strides, n=rank, arg_name='strides', validate_args=validate_args) padding = _prepare_padding_argument(padding) dilations = prepare_tuple_argument(dilations, n=rank, arg_name='dilations', validate_args=validate_args) strides_ = [tf.get_static_value(s) for s in strides] dilations_ = [tf.get_static_value(d) for d in dilations] assertions = [] if is_transpose: if (all(s is not None for s in strides_) and all(d is not None for d in dilations_)): if any(s > 1 for s in strides_) and any(d > 1 for d in dilations_): raise NotImplementedError( 'At least one of `dilations` and `strides` ' 'must equal `1` for each dimension. Saw: ' '`strides={}`, `dilations={}`'.format(strides, dilations)) elif validate_args: assertions.append( assert_util.assert_equal( tf.logical_or(tf.equal(tf.reduce_max(strides), 1), tf.equal(tf.reduce_max(dilations), 1)), True, message= 'At least one of `dilations` and `strides` must equal `1` ' 'for each dimension.')) with tf.control_dependencies(assertions): return filter_shape, rank, strides, padding, dilations
def _self_suppression(iou, _, iou_sum): batch_size = tf.shape(iou)[0] can_suppress_others = tf.cast( tf.reshape(tf.reduce_max(iou, 1) <= 0.5, [batch_size, -1, 1]), iou.dtype) iou_suppressed = tf.reshape( tf.cast(tf.reduce_max(can_suppress_others * iou, 1) <= 0.5, iou.dtype), [batch_size, -1, 1]) * iou iou_sum_new = tf.reduce_sum(iou_suppressed, [1, 2]) return [ iou_suppressed, tf.reduce_any(iou_sum - iou_sum_new > 0.5), iou_sum_new ]
def _build_target_q_op(self): """Build an op used as a target for the Q-value. Returns: target_q_op: An op calculating the Q-value. """ # Get the maximum Q-value across the actions dimension. replay_next_qt_max = tf.reduce_max( self._replay_next_target_net_outputs.q_values, 1) # Calculate the Bellman target value. # Q_t = R_t + \gamma^N * Q'_t+1 # where, # Q'_t+1 = \argmax_a Q(S_t+1, a) # (or) 0 if S_t is a terminal state, # and # N is the update horizon (by default, N=1). # Here, R_t is augmented for SAIL: # R_t = R_t + \f_{nonlin}(G_t - max_a Q_target(s_t, a)), # where # G_t is the discounted return-to-go. replay_target_q = tf.reduce_max( self._replay_target_net_outputs.q_values, axis=1, name='replay_max_target_q') # Get the comparison value. # By default, this will be G_t. # If using advantage learning, this will be Q_target(s_t, a_t). replay_action_one_hot = tf.one_hot(self._replay.actions, self.num_actions, 1., 0., name='action_one_hot') replay_target_q_al = tf.reduce_sum( self._replay_target_net_outputs.q_values * replay_action_one_hot, axis=1, name='replay_chosen_target_q_sil_al') comp_value = tf.math.maximum(replay_target_q_al, self._replay.returns) if self._clip > 0.: sail_bonus = self._alpha * tf.clip_by_value( tf.nn.relu(comp_value - replay_target_q), 0., self._clip) else: sail_bonus = self._alpha * tf.nn.relu(comp_value - replay_target_q) rewards = self._replay.rewards + sail_bonus update_target = rewards + self.cumulative_gamma * replay_next_qt_max * ( 1. - tf.cast(self._replay.terminals, tf.float32)) return (sail_bonus, update_target)
def _build_target_q_op(self): """Build an op used as a target for the Q-value. Returns: target_q_op: An op calculating the Q-value. """ # Get the maximum Q-value across the actions dimension. replay_next_qt_max = tf.reduce_max( self._replay_next_target_net_outputs.q_values, 1) # Calculate the Bellman target value. # Q_t = R_t + \gamma^N * Q'_t+1 # where, # Q'_t+1 = \argmax_a Q(S_t+1, a) # (or) 0 if S_t is a terminal state, # and # N is the update horizon (by default, N=1). # Here, R_t is augmented for AL: # R_t = R_t + Q_target(s_t, a_t) - max_a Q_target(s_t, a), replay_target_q = tf.reduce_max( self._replay_target_net_outputs.q_values, axis=1, name='replay_max_target_q') replay_action_one_hot = tf.one_hot(self._replay.actions, self.num_actions, 1., 0., name='action_one_hot') replay_target_q_al = tf.reduce_sum( self._replay_target_net_outputs.q_values * replay_action_one_hot, axis=1, name='replay_chosen_target_q_al') if self._clip > 0.: al_bonus = self._alpha * tf.clip_by_value( tf.nn.relu(replay_target_q_al - replay_target_q), 0., self._clip) else: al_bonus = self._alpha * tf.nn.relu(replay_target_q_al - replay_target_q) rewards = self._replay.rewards + al_bonus update_target = rewards + self.cumulative_gamma * replay_next_qt_max * ( 1. - tf.cast(self._replay.terminals, tf.float32)) return (al_bonus, update_target)
def manual_stepping(global_step, boundaries, rates): boundaries = [0] + boundaries num_boundaries = len(boundaries) rate_index = tf.reduce_max( tf.where(tf.greater_equal(global_step, boundaries), list(range(num_boundaries)), [0] * num_boundaries)) return tf.reduce_sum(rates * tf.one_hot(rate_index, depth=num_boundaries))
def test_normal_predictive_agreement_analytic_vs_sampling_approx(self): """Check that analytic CRPS and sample approximation CRPS agree. """ tf.random.set_seed(1) nsamples = 100 npredictive_samples = 10000 labels = tf.random.normal((nsamples, )) predictive_samples = tf.random.normal((nsamples, npredictive_samples)) crps_sample = regression.crps_score( labels=labels, predictive_samples=predictive_samples) means = tf.zeros_like(labels) stddevs = tf.ones_like(labels) crps_analytic = regression.crps_normal_score(labels=labels, means=means, stddevs=stddevs) max_diff = tf.reduce_max(tf.abs(crps_sample - crps_analytic)) max_diff = float(max_diff) # CRPS is at most 1, so tolerance is an upper bound to 5*SEM tolerance = 5.0 / math.sqrt(npredictive_samples) logging.info("Maximum difference %.4f, allowed tolerance %.4f", max_diff, tolerance) self.assertLessEqual(max_diff, tolerance, msg="Sample-CRPS differs from analytic-CRPS " "by %.4f > %.4f" % (max_diff, tolerance))
def _training_step(self, transitions: Sequence[tf.Tensor]) -> tf.Tensor: """Does a step of SGD on a batch of transitions.""" o_tm1, a_tm1, r_t, d_t, o_t = transitions r_t = tf.cast(r_t, tf.float32) # [B] d_t = tf.cast(d_t, tf.float32) # [B] with tf.GradientTape() as tape: q_tm1 = self._online_network(o_tm1) # [B, A] q_t = self._target_network(o_t) # [B, A] onehot_actions = tf.one_hot(a_tm1, depth=self._num_actions) # [B, A] qa_tm1 = tf.reduce_sum(q_tm1 * onehot_actions, axis=-1) # [B] qa_t = tf.reduce_max(q_t, axis=-1) # [B] # One-step Q-learning loss. target = r_t + d_t * self._discount * qa_t td_error = qa_tm1 - target loss = 0.5 * tf.reduce_sum(td_error**2) # [] # Update the online network via SGD. variables = self._online_network.trainable_variables gradients = tape.gradient(loss, variables) self._optimizer.apply(gradients, variables) # Periodically copy online -> target network variables. if self._total_steps % self._target_update_period == 0: for target, param in zip(self._target_network.trainable_variables, self._online_network.trainable_variables): target.assign(param) return loss
def cond(m, pchol, perm, matrix_diag): """Condition for `tf.while_loop` continuation.""" del pchol del perm error = tf.linalg.norm(matrix_diag, ord=1, axis=-1) max_err = tf.reduce_max(error / orig_error) return (m < max_rank) & (tf.equal(m, 0) | (max_err > diag_rtol))
def _entropy(self): if self._logits is None: # If we only have probs, there's not much we can do to ensure numerical # precision. probs = tf.convert_to_tensor(self._probs) return -tf.reduce_sum( tf.math.multiply_no_nan(tf.math.log(probs), probs), axis=-1) # The following result can be derived as follows. Write log(p[i]) as: # s[i]-m-lse(s[i]-m) where m=max(s), then you have: # sum_i exp(s[i]-m-lse(s-m)) (s[i] - m - lse(s-m)) # = -m - lse(s-m) + sum_i s[i] exp(s[i]-m-lse(s-m)) # = -m - lse(s-m) + (1/exp(lse(s-m))) sum_i s[i] exp(s[i]-m) # = -m - lse(s-m) + (1/sumexp(s-m)) sum_i s[i] exp(s[i]-m) # Write x[i]=s[i]-m then you have: # = -m - lse(x) + (1/sum_exp(x)) sum_i s[i] exp(x[i]) # Negating all of this result is the Shanon (discrete) entropy. logits = tf.convert_to_tensor(self._logits) m = tf.reduce_max(logits, axis=-1, keepdims=True) x = logits - m lse_logits = m[..., 0] + tf.reduce_logsumexp(x, axis=-1) sum_exp_x = tf.reduce_sum(tf.math.exp(x), axis=-1) return lse_logits - tf.reduce_sum(tf.math.multiply_no_nan( logits, tf.math.exp(x)), axis=-1) / sum_exp_x
def exported_function(x): root.x = constant_op.constant([[37.0, -23.0], [1.0, 4.0]]) root.y = tf.matmul(root.x, root.w) tf.compat.v1.Print(root.x, [root.x]) tf.compat.v1.Assert(tf.greater(tf.reduce_max(root.x), 0), [root.x]) tf.compat.v1.check_numerics(root.x, 'NaN found') return root.y * x
def step(per_replica_inputs: _TensorDict) -> None: """The function defining a single validation/test step.""" features = per_replica_inputs['features'] labels = per_replica_inputs[label_key] logits = model(features, training=False) if isinstance(logits, (tuple, list)): logits, covmat = logits else: per_core_batch_size, _ = logits.get_shape().as_list() covmat = tf.eye(per_core_batch_size) logits = ed.layers.utils.mean_field_logits( logits, covmat, mean_field_factor=mean_field_factor) predictions = tf.nn.softmax(logits, axis=-1) if label_key != 'labels': predictions = tf.reduce_max(predictions, axis=-1) # Later when metric.result() is called, it will return the computed # result, averaged across replicas. for metric in metrics.values(): if isinstance(metric, tf.keras.metrics.Metric): metric.update_state(labels, predictions) # pytype: disable=attribute-error else: metric.add_batch(predictions, label=labels) return
def initial_value_of_masked_time_series(time_series_tensor, broadcast_mask): """Get the first unmasked entry of each time series in the batch. Args: time_series_tensor: float `Tensor` of shape [..., num_timesteps]. broadcast_mask: bool `Tensor` of same shape as `time_series`. """ num_timesteps = tf.shape(time_series_tensor)[-1] # Compute the index of the first unmasked entry for each series in the batch. unmasked_negindices = (tf.cast(~broadcast_mask, tf.int32) * tf.range(num_timesteps, 0, -1)) first_unmasked_indices = num_timesteps - tf.reduce_max(unmasked_negindices, axis=-1) if first_unmasked_indices.shape.ndims is None: raise NotImplementedError( 'Cannot compute initial values of a masked time series with' 'dynamic rank.') # `batch_gather` requires static rank # Extract the initial value for each series in the batch. return tf.squeeze(tf.gather(params=time_series_tensor, indices=first_unmasked_indices[..., tf.newaxis], batch_dims=first_unmasked_indices.shape.ndims), axis=-1)
def activations_to_f0_and_confidence(cls, activations, centers=None): """Convert network outputs (activations) to f0 predictions.""" cent_mapping = tf.cast( tf.linspace(0, 7180, 360) + 1997.3794084376191, tf.float32) # The confidence of voicing activity and the argmax bin. confidence = tf.reduce_max(activations, axis=-1, keepdims=True) if centers is None: centers = tf.math.argmax(activations, axis=-1) centers = tf.cast(centers, tf.int32) # Slice the local neighborhood around the argmax bin. start = centers - 4 idx_list = tf.range(0, 10) idx_list = start[:, None] + idx_list[None, :] # Bound to [0, 359]. idx_list = tf.where(idx_list > 0, idx_list, 0) idx_list = tf.where(idx_list < 359, idx_list, 359) # Gather and weight activations. weights = tf.gather(activations, idx_list, batch_dims=1) cents = tf.gather(cent_mapping, idx_list, batch_dims=0) f0_cent = tf.reduce_sum(weights * cents, axis=-1) / tf.reduce_sum( weights, axis=-1) f0_hz = 10 * 2**(f0_cent / 1200.) return f0_hz, confidence
def tf_hilbert(x, axis=-1): '''Performs 1d hilbert similar to scipy''' # Change axes to be most inner for fft axis = tf.constant(axis) if axis < 0: axis = tf.rank(x) + axis axes = tf.range(tf.rank(x)) axes = tf.math.mod(axes - tf.reduce_max(axes) + axis, tf.size(axes)) x = tf.transpose(x, perm=axes) # Apply fft x = tf.cast(x, dtype=tf.complex64) Xf = tf.signal.fft(x) # Create 2U N = tf.shape(Xf)[-1] h = tf.cast(tf.ones([N // 2 + 1]) * 2, Xf.dtype) if tf.math.mod(N, 2) == 0: h = tf.tensor_scatter_nd_update(h, [[0], [tf.size(h) - 1]], [1, 1]) else: h = tf.tensor_scatter_nd_update(h, [[0]], [1]) h = tf.concat([h, tf.zeros(N - tf.size(h), dtype=h.dtype)], axis=0) # Apply ifft and hilbert x = tf.signal.ifft(Xf * h) # Change axes back x = tf.transpose(x, perm=tf.argsort(axes)) return x
def _entropy(self): if self._logits is None: # If we only have probs, there's not much we can do to ensure numerical # precision. probs = tf.convert_to_tensor(self._probs) return -tf.reduce_sum( tf.math.multiply_no_nan(tf.math.log(probs), probs), axis=-1) # The following result can be derived as follows. Let s[i] be a logit. # The entropy is: # H = -sum_i(p[i] * log(p[i])) # = -sum_i(p[i] * (s[i] - logsumexp(s)) # = logsumexp(s) - sum_i(p[i] * s[i]) logits = tf.convert_to_tensor(self._logits) logits = logits - tf.reduce_max(logits, axis=-1, keepdims=True) lse_logits = tf.reduce_logsumexp(logits, axis=-1) # TODO(b/161014180): Workaround to support correct gradient calculations # with -inf logits. masked_logits = tf.where( (tf.math.is_inf(logits) & (logits < 0)), tf.cast(1.0, dtype=logits.dtype), logits) return lse_logits - tf.reduce_sum( tf.math.multiply_no_nan(masked_logits, tf.math.exp(logits)), axis=-1) / tf.math.exp(lse_logits)
def _build_target_quantile_values_op(self): """Build an op used as a target for return values at given quantiles. Returns: An op calculating the target quantile return. """ batch_size = tf.shape(self._replay.rewards)[0] # Calculate AL modified rewards. replay_action_one_hot = tf.one_hot( self._replay.actions, self.num_actions, 1., 0., name='action_one_hot') replay_target_q = tf.reduce_max( self._replay_target_q_values, axis=1, name='replay_chosen_target_q') replay_target_q_al = tf.reduce_sum( replay_action_one_hot * self._replay_target_q_values, axis=1, name='replay_chosen_target_q_al') if self._clip > 0.: al_bonus = self._alpha * tf.clip_by_value( (replay_target_q_al - replay_target_q), -self._clip, self._clip) else: al_bonus = self._alpha * ( replay_target_q_al - replay_target_q) # Shape of rewards: (num_tau_prime_samples x batch_size) x 1. rewards = (self._replay.rewards + al_bonus)[:, None] rewards = tf.tile(rewards, [self.num_tau_prime_samples, 1]) is_terminal_multiplier = 1. - tf.cast(self._replay.terminals, tf.float32) # Incorporate terminal state to discount factor. # size of gamma_with_terminal: (num_tau_prime_samples x batch_size) x 1. gamma_with_terminal = self.cumulative_gamma * is_terminal_multiplier gamma_with_terminal = tf.tile(gamma_with_terminal[:, None], [self.num_tau_prime_samples, 1]) # Get the indices of the maximum Q-value across the action dimension. # Shape of replay_next_qt_argmax: (num_tau_prime_samples x batch_size) x 1. replay_next_qt_argmax = tf.tile( self._replay_next_qt_argmax[:, None], [self.num_tau_prime_samples, 1]) # Shape of batch_indices: (num_tau_prime_samples x batch_size) x 1. batch_indices = tf.cast(tf.range( self.num_tau_prime_samples * batch_size)[:, None], tf.int64) # Shape of batch_indexed_target_values: # (num_tau_prime_samples x batch_size) x 2. batch_indexed_target_values = tf.concat( [batch_indices, replay_next_qt_argmax], axis=1) # Shape of next_target_values: (num_tau_prime_samples x batch_size) x 1. target_quantile_values = tf.gather_nd( self._replay_net_target_quantile_values, batch_indexed_target_values)[:, None] return rewards + gamma_with_terminal * target_quantile_values
def pad_tensors(tensors, dtype=None, name=None): """Pads the innermost dimension of `Tensor`s to a common shape. Given a list of `Tensor`s of the same `dtype` and with shapes `batch_shape_i + [n_i]`, pads the innermost dimension of each tensor to `batch_shape_i + [max(n_i)]`. For each tensor `t`, the padding is done with values `t[..., -1]`. ### Example ```python x = [[1, 2, 3, 9], [2, 3, 5, 2]] y = [4, 5, 8] pad_tensors([x, y]) # Expected: [array([[1, 2, 3, 9], [2, 3, 5, 2]], array([4, 5, 8, 8])] ``` Args: tensors: A list of tensors of the same `dtype` and shapes `batch_shape_i + [n_i]`. dtype: The default dtype to use when converting values to `Tensor`s. Default value: `None` which means that default dtypes inferred by TensorFlow are used. name: Python string. The name to give to the ops created by this class. Default value: `None` which maps to the default name `pad_tensors`. Returns: A list of `Tensor`s of shape `batch_shape_i + [max(n_i)]`. Raises: ValueError: If input is not an instance of a list or a tuple. """ if not isinstance(tensors, (tuple, list)): raise ValueError( f"`tensors` should be a list or a tuple but have type {type(tensors)}" ) if not tensors: return [] name = name or "pad_tensors" with tf.name_scope(name): t0 = tf.convert_to_tensor(tensors[0], dtype=dtype) dtype = dtype or t0.dtype tensors = [t0] + [ tf.convert_to_tensor(t, dtype=dtype) for t in tensors[1:] ] max_size = tf.reduce_max([tf.shape(t)[-1] for t in tensors]) padded_tensors = [] for t in tensors: paddings = ((t.shape.rank - 1) * [[0, 0]] + [[0, max_size - tf.shape(t)[-1]]]) # Padded value has to be a constant constant_values = tf.reduce_min(t) - 1 pad_t = tf.pad(t, paddings, mode="CONSTANT", constant_values=constant_values) # Correct padded value pad_t = tf.where(pad_t > constant_values, pad_t, tf.expand_dims(t[..., -1], axis=-1)) padded_tensors.append(pad_t) return padded_tensors
def _entropy(self): if self._logits is None: # If we only have probs, there's not much we can do to ensure numerical # precision. probs = tf.convert_to_tensor(self._probs) return -tf.reduce_sum( tf.math.multiply_no_nan(tf.math.log(probs), probs), axis=-1) # The following result can be derived as follows. Write log(p[i]) as: # s[i]-m-lse(s[i]-m) where m=max(s), then you have: # sum_i exp(s[i]-m-lse(s-m)) (s[i] - m - lse(s-m)) # = -m - lse(s-m) + sum_i s[i] exp(s[i]-m-lse(s-m)) # = -m - lse(s-m) + (1/exp(lse(s-m))) sum_i s[i] exp(s[i]-m) # = -m - lse(s-m) + (1/sumexp(s-m)) sum_i s[i] exp(s[i]-m) # Write x[i]=s[i]-m then you have: # = -m - lse(x) + (1/sum_exp(x)) sum_i s[i] exp(x[i]) # Negating all of this result is the Shanon (discrete) entropy. logits = tf.convert_to_tensor(self._logits) m = tf.reduce_max(logits, axis=-1, keepdims=True) x = logits - m sum_exp_x = tf.reduce_sum(tf.math.exp(x), axis=-1) lse_logits = m[..., 0] + tf.math.log(sum_exp_x) # TODO(b/161014180): Workaround to support correct gradient calculations # with -inf logits. is_inf_logits = tf.cast(tf.math.is_inf(logits), dtype=tf.float32) is_negative_logits = tf.cast(logits < 0, dtype=tf.float32) masked_logits = tf.where( tf.cast((is_inf_logits * is_negative_logits), dtype=bool), tf.cast(1.0, dtype=logits.dtype), logits) return lse_logits - tf.reduce_sum(tf.math.multiply_no_nan( masked_logits, tf.math.exp(x)), axis=-1) / sum_exp_x
def _step(self, transitions: Sequence[tf.Tensor]): """Does a step of SGD for the whole ensemble over `transitions`.""" o_tm1, a_tm1, r_t, d_t, o_t, m_t, z_t = transitions variables = tree.flatten( [model.trainable_variables for model in self._ensemble]) with tf.GradientTape() as tape: losses = [] for k in range(self._num_ensemble): net = self._ensemble[k] target_net = self._target_ensemble[k] # Q-learning loss with added reward noise + half-in bootstrap. q_values = net(o_tm1) one_hot_actions = tf.one_hot(a_tm1, depth=self._num_actions) train_value = tf.reduce_sum(q_values * one_hot_actions, axis=-1) target_value = tf.stop_gradient( tf.reduce_max(target_net(o_t), axis=-1)) target_y = r_t + z_t[:, k] + self._discount * d_t * target_value loss = tf.square(train_value - target_y) * m_t[:, k] losses.append(loss) loss = tf.reduce_mean(tf.stack(losses)) gradients = tape.gradient(loss, variables) self._total_steps.assign_add(1) self._optimizer.apply(gradients, variables) # Periodically update the target network. if tf.math.mod(self._total_steps, self._target_update_period) == 0: for k in range(self._num_ensemble): for src, dest in zip(self._ensemble[k].variables, self._target_ensemble[k].variables): dest.assign(src)
def sum_average_transformed_mu_and_sigma(mu, log_sigma_sq): """Computes <mu>, var(mu) + <var> in transformed representation. This corresponds to assuming that the output distribution is a sum of Gaussian and computing the mean and variance of the resulting (non-Gaussian) distribution. Args: mu: Tensor of shape [B, ...] representing the means of the input distributions. log_sigma_sq: Tensor of shape [B, ...] representing log(sigma**2) of the input distributions. Can be None, in which case the variance is assumed to be zero. Returns: mu: Tensor of shape [...] representing the means of the output distributions. log_sigma_sq: Tensor of shape [...] representing log(sigma**2) of the output distributions. """ av_mu = tf.reduce_mean(mu, axis=0) var_mu = tf.math.reduce_std(mu, axis=0)**2 if log_sigma_sq is None: return av_mu, tf.math.log(var_mu) max_log_sigma_sq = tf.reduce_max(log_sigma_sq, axis=0) log_sigma_sq -= max_log_sigma_sq # (sigma/sigma_0)**2 sigma_sq = tf.math.exp(log_sigma_sq) # (<sigma**2>)/sigma_0**2 (<1) av_sigma_sq = tf.reduce_mean(sigma_sq, axis=0) # (<sigma**2> + var(mu))/sigma_0**2 av_sigma_sq += var_mu * tf.math.exp(-max_log_sigma_sq) # log(<sigma**2> + var(mu)) log_av_sigma_sq = tf.math.log(av_sigma_sq) + max_log_sigma_sq return av_mu, log_av_sigma_sq
def binary_image_to_points(features, normalize_coords=True, keys=("image", )): """Converts a (binary) image into a 2D point cloud. Args: features: Dictionary of data features to preprocess. normalize_coords: Normalize coords to be in [0,1] by preserving the aspect ratio. keys: On which keys to apply this function. Returns: Features with the image as a point cloud. """ for key in keys: image = features[key] # [HxW] or [HxWx1] image = tf.reshape(image, [image.shape[0], image.shape[1], 1]) # We map background pixels to the origin, which may be suboptimal # but saves us some engineering work. coords = tf.cast( tf.stack(tf.meshgrid(tf.range(image.shape[0]), tf.range(image.shape[1]), indexing="ij"), axis=-1), tf.float32) if normalize_coords: coords /= tf.cast(tf.reduce_max(image.shape[:2]), tf.float32) mask = tf.tile(image > 0, [1, 1, 2]) features[key] = tf.reshape(tf.cast(mask, tf.float32) * coords, [-1, 2]) return features
def visualize(pyr, percentile=99.): """Visualizes a wavelet decomposition produced by construct(). Args: pyr: A wavelet decomposition produced by construct(), percentile: The percentile of the deviation for each (non-residual) wavelet band to be clamped by before normalization. Seeting this to 100 causes visualization to clamp to the maximum deviation, which preserves the entire dynamic range but may make subtle details hard to see. A value of 99 (the default) will clip away the 1% largest-magnitude values in each band. Returns: An image (a TF tensor of uint8's) of shape (width, height, num_channels). Note that the input wavelet decomposition was produced from an image of shape (num_channels, width, height) --- this function permutes the ordering to what is expected in a planar image. """ vis_pyr = [] for d in range(len(pyr) - 1): vis_band = [] for b in range(3): band = pyr[d][b] max_mag = tfp.stats.percentile(tf.abs(band), percentile) vis_band.append(0.5 * (1. + tf.clip_by_value(band / max_mag, -1., 1.))) vis_pyr.append(vis_band) d = len(pyr) - 1 resid = pyr[d] resid_norm = (resid - tf.reduce_min(resid)) / ( tf.reduce_max(resid) - tf.reduce_min(resid)) vis_pyr.append(resid_norm) vis = tf.cast( tf.math.round(255. * tf.transpose(flatten(vis_pyr), perm=[1, 2, 0])), tf.uint8) return vis
def _check_convergence(simplex, best_vertex, best_objective, worst_objective, func_tolerance, position_tolerance): """Returns True if the simplex has converged. If the simplex size is smaller than the `position_tolerance` or the variation of the function value over the vertices of the simplex is smaller than the `func_tolerance` return True else False. Args: simplex: `Tensor` of real dtype. The simplex to test for convergence. For more details, see the docstring for `initial_simplex` argument of `minimize`. best_vertex: `Tensor` of real dtype and rank one less than `simplex`. The vertex with the best (i.e. smallest) objective value. best_objective: Scalar `Tensor` of real dtype. The best (i.e. smallest) value of the objective function at a vertex. worst_objective: Scalar `Tensor` of same dtype as `best_objective`. The worst (i.e. largest) value of the objective function at a vertex. func_tolerance: Scalar positive `Tensor`. The tolerance for the variation of the objective function value over the simplex. If the variation over the simplex vertices is below this threshold, convergence is True. position_tolerance: Scalar positive `Tensor`. The algorithm stops if the lengths (under the supremum norm) of edges connecting to the best vertex are below this threshold. Returns: has_converged: A scalar boolean `Tensor` indicating whether the algorithm is deemed to have converged. """ objective_convergence = tf.abs(worst_objective - best_objective) < func_tolerance simplex_degeneracy = tf.reduce_max( tf.abs(simplex - best_vertex)) < position_tolerance return objective_convergence | simplex_degeneracy