def _scale_mle(self, samples, scale_candidates): """Max log-likelihood estimate for scale. Args: samples: Observed data points. scale_candidates: A simple grid of candiates for scale, with shape original_batch_shape + [num_candidates], where different candidates for a single scalar parameter are at the inner most dimension (axis -1). Returns: scale_mle: max log-likelihood estimate for scale. """ dist = tfd.Horseshoe(scale=scale_candidates) dims = tf.shape(scale_candidates) num_candidates = dims[-1] original_batch_shape = dims[:-1] # log_likelihood has same shape as scale_candidates # i.e. original_batch_shape + [num_candidates] log_likelihood = tf.reduce_sum( # dist.log_prob here returns a tensor with shape # [num_samples] + original_batch_shape + [num_candidates] dist.log_prob( tf.reshape( samples, tf.concat([[-1], original_batch_shape, [1]], axis=0))), axis=0) # max log-likelihood candidate location mask mask = tf.one_hot(tf.argmax(log_likelihood, axis=-1), depth=num_candidates, dtype=self.dtype) return tf.reduce_sum(scale_candidates * mask, axis=-1)
def accuracy(y_true, y_pred): """Accuracy.""" del y_pred # unused arg y_true = tf.squeeze(y_true) return tf.equal( tf.argmax(input=model.output.distribution.logits, axis=1), tf.cast(y_true, tf.int64))
def predict_single_comment(self, token_seq: List[int]): self.hyperparameters["batch_size"] = 1 output_logits = self.compute_logits(np.array([token_seq], dtype=np.int32), training=False) next_tok_logits = output_logits[0, :, :] next_tok_ids = tf.argmax(next_tok_logits, 1).numpy() return next_tok_ids
def body(m, pchol, perm, matrix_diag): """Body of a single `tf.while_loop` iteration.""" # Here is roughly a numpy, non-batched version of what's going to happen. # (See also Algorithm 1 of Harbrecht et al.) # 1: maxi = np.argmax(matrix_diag[perm[m:]]) + m # 2: maxval = matrix_diag[perm][maxi] # 3: perm[m], perm[maxi] = perm[maxi], perm[m] # 4: row = matrix[perm[m]][perm[m + 1:]] # 5: row -= np.sum(pchol[:m][perm[m + 1:]] * pchol[:m][perm[m]]], axis=-2) # 6: pivot = np.sqrt(maxval); row /= pivot # 7: row = np.concatenate([[[pivot]], row], -1) # 8: matrix_diag[perm[m:]] -= row**2 # 9: pchol[m, perm[m:]] = row # Find the maximal position of the (remaining) permuted diagonal. # Steps 1, 2 above. permuted_diag = batch_gather(matrix_diag, perm[..., m:]) maxi = tf.argmax(permuted_diag, axis=-1, output_type=tf.int64)[..., tf.newaxis] maxval = batch_gather(permuted_diag, maxi) maxi = maxi + m maxval = maxval[..., 0] # Update perm: Swap perm[...,m] with perm[...,maxi]. Step 3 above. perm = _swap_m_with_i(perm, m, maxi) # Step 4. row = batch_gather(matrix, perm[..., m:m + 1], axis=-2) row = batch_gather(row, perm[..., m + 1:]) # Step 5. prev_rows = pchol[..., :m, :] prev_rows_perm_m_onward = batch_gather(prev_rows, perm[..., m + 1:]) prev_rows_pivot_col = batch_gather(prev_rows, perm[..., m:m + 1]) row -= tf.reduce_sum(input_tensor=prev_rows_perm_m_onward * prev_rows_pivot_col, axis=-2)[..., tf.newaxis, :] # Step 6. pivot = tf.sqrt(maxval)[..., tf.newaxis, tf.newaxis] # Step 7. row = tf.concat([pivot, row / pivot], axis=-1) # TODO(b/130899118): Pad grad fails with int64 paddings. # Step 8. paddings = tf.concat([ tf.zeros([prefer_static.rank(pchol) - 1, 2], dtype=tf.int32), [[tf.cast(m, tf.int32), 0]] ], axis=0) diag_update = tf.pad(tensor=row**2, paddings=paddings)[..., 0, :] reverse_perm = _invert_permutation(perm) matrix_diag -= batch_gather(diag_update, reverse_perm) # Step 9. row = tf.pad(tensor=row, paddings=paddings) # TODO(bjp): Defer the reverse permutation all-at-once at the end? row = batch_gather(row, reverse_perm) pchol_shape = pchol.shape pchol = tf.concat([pchol[..., :m, :], row, pchol[..., m + 1:, :]], axis=-2) tensorshape_util.set_shape(pchol, pchol_shape) return m + 1, pchol, perm, matrix_diag
def argmax(a, axis=None): a = array_creation.asarray(a) a = atleast_1d(a) if axis is None: # When axis is None numpy flattens the array. a_t = tf.reshape(a.data, [-1]) else: a_t = a.data return utils.tensor_to_ndarray(tf.argmax(input=a_t, axis=axis))
def accuracy_function(real, pred): accuracies = tf.equal(real, tf.argmax(pred, axis=2)) mask = tf.math.logical_not(tf.math.equal(real, 0)) accuracies = tf.math.logical_and(mask, accuracies) accuracies = tf.cast(accuracies, dtype=tf.float32) mask = tf.cast(mask, dtype=tf.float32) return tf.reduce_sum(accuracies) / tf.reduce_sum(mask)
def forward_step(previous_step_pair, log_prob_observation): log_prob_previous = previous_step_pair[0] log_prob = (log_prob_previous[..., tf.newaxis] + log_trans + log_prob_observation[..., tf.newaxis, :]) most_likely_given_successor = tf.argmax(log_prob, axis=-2) max_log_p_given_successor = tf.reduce_max(log_prob, axis=-2) return (max_log_p_given_successor, most_likely_given_successor)
def one_hot_argmax(inputs, temperature, axis=-1): """Returns one-hot of argmax with backward pass set to softmax-temperature.""" vocab_size = inputs.shape[-1] hard = tf.one_hot(tf.argmax(inputs, axis=axis), depth=vocab_size, axis=axis, dtype=inputs.dtype) soft = tf.nn.softmax(inputs / temperature, axis=axis) outputs = soft + tf.stop_gradient(hard - soft) return outputs
def test_unbatched_rank_one_raise(self): with self.assertRaises(ValueError): input_tensor = tf.constant([-0.6, -0.5, 0.5]) dim = len(input_tensor) n = 10000000 argmax = lambda t: tf.one_hot(tf.argmax(t, 1), dim) soft_argmax = perturbations.perturbed(argmax, sigma=0.5, num_samples=n) _ = soft_argmax(input_tensor)
def _compute_calibration_bin_statistics(num_bins, logits=None, labels_true=None, labels_predicted=None): """Compute binning statistics required for calibration measures. Args: num_bins: int, number of probability bins, e.g. 10. logits: Tensor, (n,nlabels), with logits for n instances and nlabels. labels_true: Tensor, (n,), with tf.int32 or tf.int64 elements containing ground truth class labels in the range [0,nlabels]. labels_predicted: Tensor, (n,), with tf.int32 or tf.int64 elements containing decisions of the predictive system. If `None`, we will use the argmax decision using the `logits`. Returns: bz: Tensor, shape (2,num_bins), tf.int32, counts of incorrect (row 0) and correct (row 1) predictions in each of the `num_bins` probability bins. pmean_observed: Tensor, shape (num_bins,), tf.float32, the mean predictive probabilities in each probability bin. """ if labels_predicted is None: # If no labels are provided, we take the label with the maximum probability # decision. This corresponds to the optimal expected minimum loss decision # under 0/1 loss. pred_y = tf.argmax(logits, axis=1, output_type=labels_true.dtype) else: pred_y = labels_predicted correct = tf.cast(tf.equal(pred_y, labels_true), tf.int32) # Collect predicted probabilities of decisions pred = tf.nn.softmax(logits, axis=1) prob_y = tf.gather(pred, pred_y[:, tf.newaxis], batch_dims=1) # p(pred_y | x) prob_y = tf.reshape(prob_y, (ps.size(prob_y), )) # Compute b/z histogram statistics: # bz[0,bin] contains counts of incorrect predictions in the probability bin. # bz[1,bin] contains counts of correct predictions in the probability bin. bins = tf.histogram_fixed_width_bins(prob_y, [0.0, 1.0], nbins=num_bins) event_bin_counts = tf.math.bincount(correct * num_bins + bins, minlength=2 * num_bins, maxlength=2 * num_bins) event_bin_counts = tf.reshape(event_bin_counts, (2, num_bins)) # Compute mean predicted probability value in each of the `num_bins` bins pmean_observed = tf.math.unsorted_segment_sum(prob_y, bins, num_bins) tiny = np.finfo(dtype_util.as_numpy_dtype(logits.dtype)).tiny pmean_observed = pmean_observed / ( tf.cast(tf.reduce_sum(event_bin_counts, axis=0), logits.dtype) + tiny) return event_bin_counts, pmean_observed
def get_score(period_score, within_period_score): """Combine the period and periodicity scores.""" within_period_score = tf.nn.sigmoid(within_period_score)[:, 0] per_frame_periods = tf.argmax(period_score, axis=-1) + 1 pred_period_conf = tf.reduce_max(tf.nn.softmax(period_score, axis=-1), axis=-1) pred_period_conf = tf.where(tf.math.less(per_frame_periods, 3), 0.0, pred_period_conf) within_period_score *= pred_period_conf within_period_score = np.sqrt(within_period_score) pred_score = tf.reduce_mean(within_period_score) return pred_score, within_period_score
def _compute_accuracy(labels: tf.Tensor, logits: tf.Tensor) -> tf.Tensor: """Computes classification accuracy given logits and dense labels. Args: labels: Integer Tensor of dense labels, shape [batch_size]. logits: Tensor of shape [batch_size, num_classes]. Returns: A scalar for the classification accuracy. """ correct_prediction = tf.equal( tf.argmax(logits, 1, output_type=tf.int32), labels) return tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
def _decode(self, target_ids, target_mask, start_token_ids, encoder_output, encoder_mask, training=None): """Compute likelihood of target tokens under the model. Args: target_ids: tensor with shape [batch_size, target_length, hidden_size] target_mask: self-attention bias for decoder attention layer. [batch_size, input_length] start_token_ids: int32 tensor of shape [batch_size] for first decoder input. encoder_output: Continuous representation of input sequence. Float tensor with shape [batch_size, input_length, hidden_size]. encoder_mask: Float tensor with shape [batch_size, input_length]. training: Boolean indicating whether the call is training or inference. Returns: A dict containing the output ids, the output log-probs, the output logits. """ # Prepare inputs to decoder layers by shifting targets, embedding ids, # adding positional encoding and applying dropout. input_ids = self.get_inputs_from_targets(target_ids, start_token_ids) input_embs = self.embeder(input_ids, self.params["max_decoder_length"], training=training) outputs = self.decoder(input_embs, target_mask, encoder_output, encoder_mask, training=training) logits = self.embeder.linear(outputs) output_ids = tf.cast(tf.argmax(logits, axis=-1), tf.int32) log_probs = -tf.nn.sparse_softmax_cross_entropy_with_logits( labels=target_ids, logits=logits) log_probs = tf.where(target_ids > 0, log_probs, tf.zeros_like(log_probs, tf.float32)) return ( tf.identity(log_probs, name="log_probs"), tf.identity(logits, name="logits"), tf.cast(output_ids, tf.int32, name="pred_ids"), )
def _reduce_multiple_steps(): """Perform `reduce_max` operation when `num_steps` > 1.""" def forward_step(previous_step_pair, log_prob_observation): log_prob_previous = previous_step_pair[0] log_prob = ( log_prob_previous[..., tf.newaxis] + self._log_trans + log_prob_observation[..., tf.newaxis, :]) most_likely_given_successor = tf.argmax(log_prob, axis=-2) max_log_p_given_successor = tf.reduce_max(log_prob, axis=-2) return (max_log_p_given_successor, most_likely_given_successor) forward_log_probs, all_most_likely_given_successor = tf.scan( forward_step, observation_log_probs[1:], initializer=(log_prob, tf.zeros(tf.shape(log_prob), dtype=tf.int64)), name="forward_log_probs") most_likely_end = tf.argmax(forward_log_probs[-1], axis=-1) # We require the operation that gives C from A and B where # C[i...j] = A[i...j, B[i...j]] # and A = most_likely_given_successor # B = most_likely_successor. # tf.gather requires indices of known shape so instead we use # reduction with tf.one_hot(B) to pick out elements from B def backward_step(most_likely_successor, most_likely_given_successor): return tf.reduce_sum( (most_likely_given_successor * tf.one_hot(most_likely_successor, self._num_states, dtype=tf.int64)), axis=-1) backward_scan = tf.scan( backward_step, all_most_likely_given_successor, most_likely_end, reverse=True) most_likely_sequences = tf.concat( [backward_scan, [most_likely_end]], axis=0) return distribution_util.move_dimension( most_likely_sequences, 0, -1)
def testMultiplicativeInverse(self): batch_size = 3 vocab_size = 79 length = 5 inputs = np.random.randint(0, vocab_size - 1, size=(batch_size, length)) one_hot_inputs = tf.one_hot(inputs, depth=vocab_size) one_hot_inv = ed.layers.utils.multiplicative_inverse( one_hot_inputs, vocab_size) inv_inputs = tf.argmax(one_hot_inv, axis=-1) inputs_inv_inputs = tf.math.floormod(inputs * inv_inputs, vocab_size) self.assertAllEqual(inputs_inv_inputs, np.ones((batch_size, length)))
def update_state(self, labels, probabilities, **kwargs): """Updates this metric. Args: labels: Tensor of shape (N,) of class labels, one per example. probabilities: Tensor of shape (N,) or (N, k) of normalized probabilities associated with the True class in the binary case or with each of k classes in the multiclass case. **kwargs: Other potential keywords, which will be ignored by this method. """ del kwargs # unused labels = tf.squeeze(tf.convert_to_tensor(labels)) probabilities = tf.convert_to_tensor(probabilities, self.dtype) if self.num_classes == 2: # Explicitly ensure probs have shape [n, 2] instead of [n, 1] or [n,]. n = tf.shape(probabilities)[0] k = tf.size(probabilities) // n probabilities = tf.reshape(probabilities, [n, k]) probabilities = tf.cond( k < 2, lambda: tf.concat([1. - probabilities, probabilities], axis=1), lambda: probabilities) pred_labels = tf.argmax(probabilities, axis=1) pred_probs = tf.reduce_max(probabilities, axis=1) correct_preds = tf.equal(pred_labels, tf.cast(labels, pred_labels.dtype)) correct_preds = tf.cast(correct_preds, self.dtype) bin_indices = tf.histogram_fixed_width_bins(pred_probs, tf.constant([0., 1.], self.dtype), nbins=self.num_bins) batch_correct_sums = tf.math.unsorted_segment_sum( data=tf.cast(correct_preds, self.dtype), segment_ids=bin_indices, num_segments=self.num_bins) batch_prob_sums = tf.math.unsorted_segment_sum( data=pred_probs, segment_ids=bin_indices, num_segments=self.num_bins) batch_counts = tf.math.unsorted_segment_sum( data=tf.ones_like(bin_indices), segment_ids=bin_indices, num_segments=self.num_bins) batch_counts = tf.cast(batch_counts, self.dtype) self.correct_sums.assign_add(batch_correct_sums) self.prob_sums.assign_add(batch_prob_sums) self.counts.assign_add(batch_counts)
def sample(self, gray_cond, mode='argmax'): output = {} z_gray = self.encoder(gray_cond, training=False) if self.is_parallel_loss: z_logits = self.parallel_dense(z_gray) parallel_image = tf.argmax(z_logits, axis=-1, output_type=tf.int32) parallel_image = self.post_process_image(parallel_image) output['parallel'] = parallel_image image, proba = self.autoregressive_sample(z_gray=z_gray, mode=mode) output['auto_%s' % mode] = image output['proba'] = proba return output
def testDefaultAccuracy(self): # Model returns logits for 3 samples and 5 classes. n_sample, n_out = 8, 10 model = self._create_mock_model(n_out=n_out) x = tf.ones((n_sample, 2)) y = tf.ones((n_sample,), dtype=tf.int32) _, acc, total_samples = train_utils.cross_entropy_loss( model, (x, y), calculate_accuracy=True) self.assertEqual(total_samples, n_sample) logits = self.get_logits(n_sample, n_out) predictions = tf.cast(tf.argmax(logits, 1), y.dtype) acc_obj = tf.keras.metrics.Accuracy() acc_obj.update_state(tf.squeeze(y), predictions) true_acc = acc_obj.result().numpy() self.assertAllClose(acc, true_acc)
def __call__(self, x, y): h1_output = tf.argmax(self.h1(x), axis=1) h2_output = self.h2(x) h1_diff = h1_output - y h1_correct = (h1_diff == 0) _, x_support = tf.dynamic_partition( x, tf.dtypes.cast(h1_correct, tf.int32), 2) _, y_support = tf.dynamic_partition( y, tf.dtypes.cast(h1_correct, tf.int32), 2) h2_support_output = self.h2(x_support) dissonance = self.dissonance(h2_support_output, y_support) new_error_loss = self.nll_loss(y, h2_output) + self.lambda_c * dissonance return new_error_loss
def _tf_example_to_step_ds(tf_example: tf.train.Example, episode_length: int) -> Dict[str, Any]: """Create an episode from a TF example.""" # Parse tf.Example. def sequence_feature(shape, dtype=tf.float32): return tf.io.FixedLenFeature(shape=[episode_length] + shape, dtype=dtype) feature_description = { 'episode_id': tf.io.FixedLenFeature([], tf.int64), 'start_idx': tf.io.FixedLenFeature([], tf.int64), 'episode_return': tf.io.FixedLenFeature([], tf.float32), 'observations_pixels': sequence_feature([], tf.string), 'observations_reward': sequence_feature([]), # actions are one-hot arrays. 'observations_action': sequence_feature([15]), 'actions': sequence_feature([], tf.int64), 'rewards': sequence_feature([]), 'discounted_rewards': sequence_feature([]), 'discounts': sequence_feature([]), } data = tf.io.parse_single_example(tf_example, feature_description) episode = { # Episode Metadata 'episode_id': data['episode_id'], 'episode_return': data['episode_return'], 'steps': { 'observation': { 'pixels': data['observations_pixels'], 'last_action': tf.argmax(data['observations_action'], axis=1, output_type=tf.int64), 'last_reward': data['observations_reward'], }, 'action': data['actions'], 'reward': data['rewards'], 'discount': data['discounts'], 'is_first': [True] + [False] * (episode_length - 1), 'is_terminal': [False] * (episode_length) } } return episode
def test(model, dataset, step_counter): """Perform an evaluation of `model` on the examples from `dataset`.""" avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32) accuracy = tf.keras.metrics.Accuracy('accuracy', dtype=tf.float32) for features in dataset: images, labels = get_image_labels(features, FLAGS.shuffled_labels) logits = model(images, labels, training=False, step=step_counter) loss_value, _ = loss(logits, labels) avg_loss(loss_value) accuracy(tf.argmax(logits, axis=1, output_type=tf.int64), tf.cast(labels, tf.int64)) print('Test set: Average loss: %.4f, Accuracy: %4f%%\n' % (avg_loss.result(), 100 * accuracy.result())) with tf.summary.always_record_summaries(): tf.summary.scalar('loss', avg_loss.result(), step=step_counter) tf.summary.scalar('accuracy', accuracy.result(), step=step_counter)
def accuracy_function(real, pred): '''custom accuracy function that masks tokens real the real tokens pred the predicted tokens Returns: tensor with the calculated accuracy ''' accuracies = tf.equal(real, tf.argmax(pred, axis=2)) mask = tf.math.logical_not(tf.math.equal(real, 0)) accuracies = tf.math.logical_and(mask, accuracies) accuracies = tf.cast(accuracies, dtype=tf.float32) mask = tf.cast(mask, dtype=tf.float32) return tf.reduce_sum(accuracies) / tf.reduce_sum(mask)
def sample(self, gray_cond, inputs, mode='argmax'): output = dict() output['low_res_cond'] = tf.cast(inputs, dtype=tf.uint8) logits = self.upsampler(inputs, gray_cond, training=False) if mode == 'argmax': samples = tf.argmax(logits, axis=-1) elif mode == 'sample': batch_size, height, width, channels = logits.shape[:-1] logits = tf.reshape(logits, (batch_size*height*width*channels, -1)) samples = tf.random.categorical(logits, num_samples=1, dtype=tf.int32)[:, 0] samples = tf.reshape(samples, (batch_size, height, width, channels)) samples = tf.cast(samples, dtype=tf.uint8) output[f'high_res_{mode}'] = samples return output
def decode_tf(self, ids: tf.Tensor) -> tf.Tensor: """Detokenizes int32 Tensor to a string Scalar, up to EOS.""" valid_ids = tf.constant(ids) if self.unk_id is not None: valid_ids = tf.where(tf.less(valid_ids, self._base_vocab_size), valid_ids, self.unk_id) if self.eos_id is not None: # Argmax always returns the first occurrence. first_eos = tf.argmax(tf.equal(valid_ids, self.eos_id)) valid_ids = tf.cond( tf.logical_and(tf.equal(first_eos, 0), tf.not_equal(valid_ids[0], self.eos_id)), lambda: valid_ids, lambda: valid_ids[:first_eos]) return self._decode_tf(valid_ids)
def compute_loss_and_acc( self, rnn_output_logits: tf.Tensor, batch_features: Dict[str, tf.Tensor], batch_labels: Dict[str, tf.Tensor], ) -> LanguageModelLoss: """ Args: rnn_output_logits: tf.float32 Tensor of shape [B, T, V], representing logits as computed by the language model. target_token_seq: tf.int32 Tensor of shape [B, T], representing the target token sequence. Returns: LanguageModelLoss tuple, containing both the average per-token loss as well as the number of (non-padding) token predictions and how many of those were correct. Note: We assume that the two inputs are shifted by one from each other, i.e., that rnn_output_logits[i, t, :] are the logits for sample i after consuming input t; hence its target output is assumed to be target_token_seq[i, t+1]. """ target_token_seq = tf.cast(batch_labels["target_value"], tf.int32) num_graphs = tf.cast(batch_features["num_graphs_in_batch"], tf.float32) mask = tf.math.not_equal( target_token_seq[:, 1:], self.vocab_target.get_id_or_unk(self.vocab_target.get_pad())) num_tokens = tf.math.count_nonzero(mask) prediction = tf.cast(tf.argmax(rnn_output_logits, 2), tf.int32) compared = tf.cast(tf.math.equal(target_token_seq[:, 1:], prediction), tf.int32) * tf.cast(mask, tf.int32) num_correct_tokens = tf.math.count_nonzero(compared) # 7# Mask out CE loss for padding tokens token_ce_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=tf.boolean_mask(rnn_output_logits, mask), labels=tf.boolean_mask(target_token_seq[:, 1:], mask)) token_ce_loss = tf.reduce_sum(token_ce_loss) return LanguageModelLoss(token_ce_loss, num_tokens, num_correct_tokens)
def sample(self, gray_cond, bit_cond, mode='argmax'): output = dict() bit_cond_viz = base_utils.convert_bits(bit_cond, n_bits_in=3, n_bits_out=8) output['bit_cond'] = tf.cast(bit_cond_viz, dtype=tf.uint8) logits = self.upsampler(bit_cond, gray_cond, training=False) if mode == 'argmax': samples = tf.argmax(logits, axis=-1) elif mode == 'sample': batch_size, height, width, channels = logits.shape[:-1] logits = tf.reshape(logits, (batch_size*height*width*channels, -1)) samples = tf.random.categorical(logits, num_samples=1, dtype=tf.int32)[:, 0] samples = tf.reshape(samples, (batch_size, height, width, channels)) samples = tf.cast(samples, dtype=tf.uint8) output[f'bit_up_{mode}'] = samples return output
def compute_loss_and_acc(self, rnn_output_logits: tf.Tensor, target_token_seq: tf.Tensor) -> LanguageModelLoss: """ Args: rnn_output_logits: tf.float32 Tensor of shape [B, T, V], representing logits as computed by the language model. target_token_seq: tf.int32 Tensor of shape [B, T], representing the target token sequence. Returns: LanguageModelLoss tuple, containing both the average per-token loss as well as the number of (non-padding) token predictions and how many of those were correct. Note: We assume that the two inputs are shifted by one from each other, i.e., that rnn_output_logits[i, t, :] are the logits for sample i after consuming input t; hence its target output is assumed to be target_token_seq[i, t+1]. """ # TODO 5# 4) Compute CE loss for all but the last timestep: token_ce_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=target_token_seq[:, 1:], logits=rnn_output_logits[:, :-1, :]) # token_ce_loss = tf.reduce_mean(token_ce_loss) becomes redundant, because I do it at TODO 7 # TODO 6# Compute number of (correct) predictions pad_id = self.vocab.get_id_or_unk(self.vocab.get_pad()) mask = tf.logical_not(tf.equal(target_token_seq, pad_id))[:, 1:] # compute predictions correctness and drop the padding by applying the mask predictions_status = tf.boolean_mask( tf.equal(target_token_seq[:, 1:], tf.argmax(rnn_output_logits[:, :-1], axis=2)), mask) num_tokens = len(predictions_status) num_correct_tokens = tf.math.count_nonzero(predictions_status, dtype=tf.float32) # TODO 7# Mask out CE loss for padding tokens token_ce_loss = tf.boolean_mask(token_ce_loss, mask) token_ce_loss = tf.reduce_mean(token_ce_loss) return LanguageModelLoss(token_ce_loss, num_tokens, num_correct_tokens)
def argmax(a, axis=None): """Returns the indices of the maximum values along an array axis. Args: a: array_like. Could be an ndarray, a Tensor or any object that can be converted to a Tensor using `tf.convert_to_tensor`. axis: Optional. The axis along which to compute argmax. If None, index of the max element in the flattened array is returned. Returns: An ndarray with the same shape as `a` with `axis` removed if not None. If `axis` is None, a scalar array is returned. """ a = array_creation.asarray(a) if axis is None or utils.isscalar(a): # When axis is None or the array is a scalar, numpy flattens the array. a_t = tf.reshape(a.data, [-1]) else: a_t = a.data return utils.tensor_to_ndarray(tf.argmax(input=a_t, axis=axis))
def _reshape_tensors(data): data['note_active_frame_indices'] = tf.reshape( data['note_active_frame_indices'], (-1, 128)) data['note_active_velocities'] = tf.reshape( data['note_active_velocities'], (-1, 128)) data['instrument_id'] = inst_vocab.lookup(data['instrument_id']) data['midi'] = tf.argmax(data['note_active_frame_indices'], axis=-1) data['f0_hz'] = data['f0_hz'][..., tf.newaxis] data['loudness_db'] = data['loudness_db'][..., tf.newaxis] onsets = tf.reduce_sum(tf.reshape(data['note_onsets'], (-1, 128)), axis=-1) data['onsets'] = tf.cast(onsets > 0, tf.int64) offsets = tf.reduce_sum(tf.reshape(data['note_offsets'], (-1, 128)), axis=-1) data['offsets'] = tf.cast(offsets > 0, tf.int64) return data
def select_actor_action(self, env_output, agent_output): assert self._mode, 'mode must be set for selecting action in actor.' oracle_next_action = env_output.observation[ streetview_constants.ORACLE_NEXT_ACTION] if self._mode == 'train': if self._loss_type == common.CE_LOSS: # This is teacher-forcing mode, so choose action same as oracle action. action_idx = oracle_next_action elif self._loss_type == common.AC_LOSS: action_idx = tfp.distributions.Categorical( logits=agent_output.policy_logits).sample() else: # In non-train modes, choose greedily. action_idx = tf.argmax(agent_output.policy_logits, axis=-1) # Return ActorAction and the action to be passed to the env step function. return common.ActorAction( chosen_action_idx=int(action_idx.numpy()), oracle_next_action_idx=int( oracle_next_action.numpy())), action_idx.numpy()