def keypoint_prune_outside_window(keypoints, window, scope=None): """Prunes keypoints that fall outside a given window. This function replaces keypoints that fall outside the given window with nan. See also clip_to_window which clips any keypoints that fall outside the given window. Args: keypoints: a tensor of shape [num_instances, num_keypoints, 2] window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max] window outside of which the op should prune the keypoints. scope: name scope. Returns: new_keypoints: a tensor of shape [num_instances, num_keypoints, 2] """ if not scope: scope = 'PruneOutsideWindow' with tf.name_scope(scope): y, x = tf.split(value=keypoints, num_or_size_splits=2, axis=2) win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) valid_indices = tf.logical_and( tf.logical_and(y >= win_y_min, y <= win_y_max), tf.logical_and(x >= win_x_min, x <= win_x_max)) new_y = tf.where(valid_indices, y, np.nan * tf.ones_like(y)) new_x = tf.where(valid_indices, x, np.nan * tf.ones_like(x)) new_keypoints = tf.concat([new_y, new_x], 2) return new_keypoints
def __init__(self, batch_size, max_len): """Class constructor. """ data_train, data_valid = tfds.load("ted_hrlr_translate/pt_to_en", split=['train', 'validation'], as_supervised=True) tokenizer_pt, tokenizer_en = self.tokenize_dataset(data_train) self.tokenizer_pt = tokenizer_pt self.tokenizer_en = tokenizer_en # data_train self.data_train = data_train.map(self.tf_encode) self.data_train = self.data_train.filter(lambda x, y: tf.logical_and( tf.size(x) <= max_len, tf.size(y) <= max_len)) self.data_train = self.data_train.cache() dataset_size = self.data_train.reduce(0, lambda x, _: x + 1).numpy() self.data_train = self.data_train.shuffle(dataset_size) self.data_train = self.data_train.padded_batch(batch_size) self.data_train = self.data_train.prefetch( tf.data.experimental.AUTOTUNE) # data_valid self.data_valid = data_valid.map(self.tf_encode) self.data_valid = self.data_valid.filter(lambda x, y: tf.logical_and( tf.size(x) <= max_len, tf.size(y) <= max_len)) self.data_valid = self.data_valid.padded_batch(batch_size)
def eval_step(model, x, fifth_embedding_1, fifth_embedding_2, label): """Evaluates a single example from the validation set.""" assert x.shape[0] == 1, 'Only supports batch_size=1 for now' _, output_embedding = model(x, training=False) sim_1 = tf.matmul(output_embedding, fifth_embedding_1, transpose_b=True) sim_2 = tf.matmul(output_embedding, fifth_embedding_2, transpose_b=True) correct_1 = tf.squeeze( tf.logical_and(tf.greater(sim_1, sim_2), tf.equal(label, 0))) correct_2 = tf.squeeze( tf.logical_and(tf.greater(sim_2, sim_1), tf.equal(label, 1))) return tf.logical_or(correct_1, correct_2)
def _cdf(self, x): concentration1 = tf.convert_to_tensor(self.concentration1) concentration0 = tf.convert_to_tensor(self.concentration0) safe_x = tf.where(tf.logical_and(x >= 0, x < 1), x, 0.5) answer = tfp_math.betainc(concentration1, concentration0, safe_x) return distribution_util.extend_cdf_outside_support( x, answer, low=0., high=1.)
def call(self, inputs, count_weights=None): if isinstance(inputs, (list, np.ndarray)): inputs = tf.convert_to_tensor(inputs) if inputs.shape.rank == 1: inputs = tf.compat.v1.expand_dims(inputs, 1) if count_weights is not None and self.output_mode != COUNT: raise ValueError("count_weights is not used in " "`output_mode='multi_hot'`. Please pass a single input.") out_depth = self.num_tokens multi_hot_output = (self.output_mode == MULTI_HOT) if isinstance(inputs, tf.SparseTensor): max_value = tf.reduce_max(inputs.values) min_value = tf.reduce_min(inputs.values) else: max_value = tf.reduce_max(inputs) min_value = tf.reduce_min(inputs) condition = tf.logical_and( tf.greater( tf.cast(out_depth, max_value.dtype), max_value), tf.greater_equal( min_value, tf.cast(0, min_value.dtype))) tf.Assert(condition, [ "Input values must be in the range 0 <= values < num_tokens" " with num_tokens={}".format(out_depth) ]) if self.sparse: return sparse_bincount(inputs, out_depth, multi_hot_output, count_weights) else: return dense_bincount(inputs, out_depth, multi_hot_output, count_weights)
def call(self, inputs, count_weights=None): inputs = utils.ensure_tensor(inputs) if count_weights is not None: if self.output_mode != COUNT: raise ValueError( "`count_weights` is not used when `output_mode` is not `'count'`. " "Received `count_weights={}`.".format(count_weights)) count_weights = utils.ensure_tensor(count_weights, self.compute_dtype) depth = self.num_tokens if isinstance(inputs, tf.SparseTensor): max_value = tf.reduce_max(inputs.values) min_value = tf.reduce_min(inputs.values) else: max_value = tf.reduce_max(inputs) min_value = tf.reduce_min(inputs) condition = tf.logical_and( tf.greater(tf.cast(depth, max_value.dtype), max_value), tf.greater_equal(min_value, tf.cast(0, min_value.dtype))) assertion = tf.Assert(condition, [ "Input values must be in the range 0 <= values < num_tokens" " with num_tokens={}".format(depth) ]) with tf.control_dependencies([assertion]): return utils.encode_categorical_inputs( inputs, output_mode=self.output_mode, depth=depth, dtype=self.compute_dtype, sparse=self.sparse, count_weights=count_weights)
def validate(self, sentences): tokens, lookup_ids = self._tokens_to_lookup_ids(sentences) # Targets are the next word for each word of the sentence. tokens_ids_seq = lookup_ids[:, 0:-1] tokens_ids_target = lookup_ids[:, 1:] tokens_prefix = tokens[:, 0:-1] # Mask determining which positions we care about for a loss: all positions # that have a valid non-terminal token. mask = tf.logical_and(tf.logical_not(tf.equal(tokens_prefix, "")), tf.logical_not(tf.equal(tokens_prefix, "<E>"))) input_mask = tf.cast(mask, tf.int32) lstm_output = self.model(tokens_ids_seq) lstm_output = tf.reshape(lstm_output, [-1, self._state_size]) logits = self._logit_layer(lstm_output) targets = tf.reshape(tokens_ids_target, [-1]) weights = tf.cast(tf.reshape(input_mask, [-1]), tf.float32) losses = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=targets, logits=logits) # Final loss is the mean loss for all token losses. final_loss = tf.math.divide(tf.reduce_sum(tf.multiply(losses, weights)), tf.reduce_sum(weights), name="final_validation_loss") return final_loss
def generate_and_test_samples(seed): """Generate and test samples.""" v_seed, u_seed = samplers.split_seed(seed) x = samplers.normal(shape, dtype=internal_dtype, seed=v_seed) # This implicitly broadcasts concentration up to sample shape. v = 1 + c * x # In [1], there is an 'inner' rejection sampling loop which checks that # v > 0 and generates a new normal sample if it's not, saving the rest of # the computations below. We found that merging the check for v > 0 with # the `good_sample_mask` not only simplifies the code, but leads to a # ~2x speedup for small concentrations on GPU, at the cost of deviating # slightly from the implementation given in Ref. [1]. accept_v = v > 0. logv = tf.math.log1p(c * x) x2 = x * x v3 = v * v * v logv3 = logv * 3 u = samplers.uniform(shape, dtype=internal_dtype, seed=u_seed) # In [1], the suggestion is to first check u < 1 - 0.331 * x2 * x2, and to # run the check below only if it fails, in order to avoid the relatively # expensive logarithm calls. Our algorithm operates in batch mode: we will # have to compute or not compute the logarithms for the entire batch, and # as the batch gets larger, the odds we compute it grow. Therefore we # don't bother with the "cheap" check. good_sample_mask = tf.logical_and( tf.math.log(u) < (x2 / 2. + d * (1 - v3 + logv3)), accept_v) return logv3 if log_space else v3, good_sample_mask
def quadratic_with_spike(x): quadratic = tf.reduce_sum( scales * tf.math.squared_difference(x, minimum), axis=-1) square_hole = tf.reduce_all(tf.logical_and((x > 0.7), (x < 1.3)), axis=-1) infty = tf.constant(float('+inf'), dtype=quadratic.dtype) answer = tf.where(square_hole, infty, quadratic) return answer
def filter_max_length(x, y, max_length=max_len): """ filter method """ return tf.logical_and( tf.size(x) <= max_length, tf.size(y) <= max_length)
def train(self, sentences): token_ids, token_values, token_dense_shape = self._tokenize(sentences) tokens_sparse = tf.sparse.SparseTensor( indices=token_ids, values=token_values, dense_shape=token_dense_shape) tokens = tf.sparse.to_dense(tokens_sparse, default_value="") sparse_lookup_ids = tf.sparse.SparseTensor( indices=tokens_sparse.indices, values=self._words_to_indices(tokens_sparse.values), dense_shape=tokens_sparse.dense_shape) lookup_ids = tf.sparse.to_dense(sparse_lookup_ids, default_value=0) # Targets are the next word for each word of the sentence. tokens_ids_seq = lookup_ids[:, 0:-1] tokens_ids_target = lookup_ids[:, 1:] tokens_prefix = tokens[:, 0:-1] # Mask determining which positions we care about for a loss: all positions # that have a valid non-terminal token. mask = tf.logical_and( tf.logical_not(tf.equal(tokens_prefix, "")), tf.logical_not(tf.equal(tokens_prefix, "<E>"))) input_mask = tf.cast(mask, tf.int32) with tf.GradientTape() as t: sentence_embeddings = tf.nn.embedding_lookup(self._embeddings, tokens_ids_seq) lstm_initial_state = self._lstm_cell.get_initial_state( sentence_embeddings) lstm_output = self._rnn_layer( inputs=sentence_embeddings, initial_state=lstm_initial_state) # Stack LSTM outputs into a batch instead of a 2D array. lstm_output = tf.reshape(lstm_output, [-1, self._lstm_cell.output_size]) logits = self._logit_layer(lstm_output) targets = tf.reshape(tokens_ids_target, [-1]) weights = tf.cast(tf.reshape(input_mask, [-1]), tf.float32) losses = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=targets, logits=logits) # Final loss is the mean loss for all token losses. final_loss = tf.math.divide( tf.reduce_sum(tf.multiply(losses, weights)), tf.reduce_sum(weights), name="final_loss") watched = t.watched_variables() gradients = t.gradient(final_loss, watched) for w, g in zip(watched, gradients): w.assign_sub(g) return final_loss
def _prob(self, x): if self.validate_args: with tf.control_dependencies([ assert_util.assert_greater_equal(x, self.low), assert_util.assert_less_equal(x, self.high) ]): x = tf.identity(x) broadcast_x_to_high = _broadcast_to(x, [self.high]) left_of_peak = tf.logical_and( broadcast_x_to_high > self.low, broadcast_x_to_high <= self.peak) interval_length = self.high - self.low # This is the pdf function when a low <= high <= x. This looks like # a triangle, so we have to treat each line segment separately. result_inside_interval = tf.where( left_of_peak, # Line segment from (self.low, 0) to (self.peak, 2 / (self.high - # self.low). 2. * (x - self.low) / (interval_length * (self.peak - self.low)), # Line segment from (self.peak, 2 / (self.high - self.low)) to # (self.high, 0). 2. * (self.high - x) / (interval_length * (self.high - self.peak))) broadcast_x_to_peak = _broadcast_to(x, [self.peak]) outside_interval = tf.logical_or( broadcast_x_to_peak < self.low, broadcast_x_to_peak > self.high) broadcast_shape = tf.broadcast_dynamic_shape( tf.shape(input=x), self.batch_shape_tensor()) return tf.where( outside_interval, tf.zeros(broadcast_shape, dtype=self.dtype), result_inside_interval)
def _cdf(self, x): broadcast_shape = tf.broadcast_dynamic_shape( tf.shape(input=x), self.batch_shape_tensor()) broadcast_x_to_high = _broadcast_to(x, [self.high]) left_of_peak = tf.logical_and( broadcast_x_to_high > self.low, broadcast_x_to_high <= self.peak) interval_length = self.high - self.low # Due to the PDF being not smooth at the peak, we have to treat each side # somewhat differently. The PDF is two line segments, and thus we get # quadratics here for the CDF. result_inside_interval = tf.where( left_of_peak, # (x - low) ** 2 / ((high - low) * (peak - low)) tf.math.squared_difference(x, self.low) / (interval_length * (self.peak - self.low)), # 1 - (high - x) ** 2 / ((high - low) * (high - peak)) 1. - tf.math.squared_difference(self.high, x) / (interval_length * (self.high - self.peak))) broadcast_x_to_high_peak = _broadcast_to(broadcast_x_to_high, [self.peak]) zeros = tf.zeros(broadcast_shape, dtype=self.dtype) # We now add that the left tail is 0 and the right tail is 1. result_if_not_big = tf.where( broadcast_x_to_high_peak < self.low, zeros, result_inside_interval) broadcast_x_to_peak_low = _broadcast_to(x, [self.low, self.peak]) ones = tf.ones(broadcast_shape, dtype=self.dtype) return tf.where( broadcast_x_to_peak_low >= self.high, ones, result_if_not_big)
def shrink_ss(inputs_, theta_, q, return_index=False): """ Special shrink that does not apply soft shrinkage to entries of top q% magnitudes. :inputs_: TODO :thres_: TODO :q: TODO :returns: TODO """ abs_ = tf.abs(inputs_) thres_ = tfp.stats.percentile(abs_, 100.0 - q, axis=1, keepdims=True) """ Entries that are greater than thresholds and in the top q% simultnaneously will be selected into the support, and thus will not be sent to the shrinkage function. """ index_ = tf.logical_and(abs_ > theta_, abs_ > thres_) index_ = tf.cast(index_, tf.float32) """Stop gradient at index_, considering it as constant.""" index_ = tf.stop_gradient(index_) cindex_ = 1.0 - index_ # complementary index output = (tf.multiply(index_, inputs_) + shrink_free(tf.multiply(cindex_, inputs_), theta_)) if return_index: return output, cindex_ else: return output
def loop_body_fn(matrices, column): # shape: (dim, log_num_results) column_values = tf.gather(matrices, [column], axis=1) # shape: (dim, log_num_results) should_be_updated = tf.logical_and( # Columns whose index is smaller than the degree of the primitive # polynomial are obtained from direction numbers and should not be # updated. tf.less_equal(tf.math.maximum(degree, column + 1), indices), # During a given iteration, only the next `n` columns (where `n` is the # degree of the primitive polynomial) should be updated. tf.less_equal(indices, column + degree)) # shape: (dim, log_num_results) updated_matrices = tf.bitwise.bitwise_xor( tf.where(tf.equal(indices, column + degree), tf.bitwise.right_shift(column_values, degree), matrices), utils.filter_tensor(column_values, polynomial, column + degree - indices)) # shape: (dim, log_num_results) returned_matrices = tf.where(should_be_updated, updated_matrices, matrices) return (returned_matrices, column + 1)
def __init__(self, batch_size, max_len): """class init""" self.data_train = tfds.load('ted_hrlr_translate/pt_to_en', split='train', as_supervised=True) self.data_valid = tfds.load('ted_hrlr_translate/pt_to_en', split='validation', as_supervised=True) pt, en = self.tokenize_dataset(self.data_train) self.tokenizer_pt = pt self.tokenizer_en = en self.batch_size = batch_size self.max_len = max_len int_64 = (tf.int64, tf.int64) self.data_train = self.data_train.map(lambda x, y: tf.py_function(self.tf_encode, [x, y], int_64)) self.data_train = self.data_train.filter(lambda x, y: tf.logical_and( tf.size(x) <= self.max_len, tf.size(y) <= self.max_len)) self.data_train = self.data_train.cache() self.data_train = self.data_train.shuffle(10000000) self.data_train = self.data_train.padded_batch(self.batch_size, ([None], [None])) self.data_train = self.data_train.prefetch( tf.data.experimental.AUTOTUNE) self.data_valid = self.data_valid.map(lambda x, y: tf.py_function(self.tf_encode, [x, y], int_64)) self.data_valid = self.data_valid.filter(lambda x, y: tf.logical_and( tf.size(x) <= self.max_len, tf.size(y) <= self.max_len)) self.data_valid = self.data_valid.padded_batch(self.batch_size, ([None], [None]))
def filter_boxes(boxes, scores, image_shape, min_size_threshold): """Filter and remove boxes that are too small or fall outside the image. Args: boxes: a tensor whose last dimension is 4 representing the coordinates of boxes in ymin, xmin, ymax, xmax order. scores: a tensor whose shape is the same as tf.shape(boxes)[:-1] representing the original scores of the boxes. image_shape: a tensor whose shape is the same as, or `broadcastable` to `boxes` except the last dimension, which is 2, representing [height, width] of the scaled image. min_size_threshold: a float representing the minimal box size in each side (w.r.t. the scaled image). Boxes whose sides are smaller than it will be filtered out. Returns: filtered_boxes: a tensor whose shape is the same as `boxes` but with the position of the filtered boxes are filled with 0. filtered_scores: a tensor whose shape is the same as 'scores' but with the positinon of the filtered boxes filled with 0. """ if boxes.shape[-1] != 4: raise ValueError('boxes.shape[1] is {:d}, but must be 4.'.format( boxes.shape[-1])) with tf.name_scope('filter_boxes'): if isinstance(image_shape, list) or isinstance(image_shape, tuple): height, width = image_shape else: image_shape = tf.cast(image_shape, dtype=boxes.dtype) height = image_shape[..., 0] width = image_shape[..., 1] ymin = boxes[..., 0] xmin = boxes[..., 1] ymax = boxes[..., 2] xmax = boxes[..., 3] h = ymax - ymin + 1.0 w = xmax - xmin + 1.0 yc = ymin + 0.5 * h xc = xmin + 0.5 * w min_size = tf.cast( tf.math.maximum(min_size_threshold, 1.0), dtype=boxes.dtype) filtered_size_mask = tf.math.logical_and( tf.math.greater(h, min_size), tf.math.greater(w, min_size)) filtered_center_mask = tf.logical_and( tf.math.logical_and(tf.math.greater(yc, 0.0), tf.math.less(yc, height)), tf.math.logical_and(tf.math.greater(xc, 0.0), tf.math.less(xc, width))) filtered_mask = tf.math.logical_and(filtered_size_mask, filtered_center_mask) filtered_scores = tf.where(filtered_mask, scores, tf.zeros_like(scores)) filtered_boxes = tf.cast( tf.expand_dims(filtered_mask, axis=-1), dtype=boxes.dtype) * boxes return filtered_boxes, filtered_scores
def get_short_note_loss_mask(note_mask, note_lengths, note_pitches, min_length=40): """Creates a 1-D binary mask for notes shorter than min_length.""" short_notes = tf.logical_and(note_lengths < min_length, note_pitches > 0.0) short_notes = tf.cast(short_notes, tf.float32) short_note_mask = note_mask * short_notes[:, None, :] loss_mask = tf.reduce_sum(short_note_mask, axis=-1) return loss_mask
def is_cudnn_supported_inputs(mask, time_major): if time_major: mask = tf.transpose(mask) return tf.logical_and( is_sequence_right_padded(mask), tf.logical_not(has_fully_masked_sequence(mask)), )
def filter_max_length(x, y, max_len=max_len): """ Filters data by max_len """ filtered = tf.logical_and( tf.size(x) <= max_len, tf.size(y) <= max_len) return filtered
def num_episodes(self) -> Union[int, tf.Tensor]: all_episode_infos = self._episode_info_table.read( tf.range(self._last_episode_id + 1)) full_episodes = tf.logical_and( StepType.is_first(all_episode_infos.episode_start_type), StepType.is_last(all_episode_infos.episode_end_type)) return tf.cast(tf.reduce_sum(tf.cast(full_episodes, tf.float32)), tf.int64)
def filter_max_length(x, y, max_length=max_len): """ Function that filter out all examples that have either sentence with more than max_len tokens """ return tf.logical_and( tf.size(x) <= max_length, tf.size(y) <= max_length)
def get_non_empty_box_indices(boxes): """Get indices for non-empty boxes.""" # Selects indices if box height or width is 0. height = boxes[:, 2] - boxes[:, 0] width = boxes[:, 3] - boxes[:, 1] indices = tf.where(tf.logical_and(tf.greater(height, 0), tf.greater(width, 0))) return indices[:, 0]
def is_absorbing(self): """Checks if step is an absorbing (terminal) step of an episode.""" if tf.is_tensor(self.discount): return tf.logical_and( tf.equal(self.discount, tf.constant(0, self.discount.dtype)), self.is_last()) return np.logical_and( np.equal(self.discount, 0), self.is_last())
def filter_by_max_len(x, y, max_len=max_len): """ Function to filter datasets, removing examples where either input or target sentences have more tokens than max_len """ return tf.logical_and( tf.size(x) <= max_len, tf.size(y) <= max_len )
def _suppression_loop_body(boxes, iou_threshold, output_size, idx): """Process boxes in the range [idx*NMS_TILE_SIZE, (idx+1)*NMS_TILE_SIZE). Args: boxes: a tensor with a shape of [batch_size, anchors, 4]. iou_threshold: a float representing the threshold for deciding whether boxes overlap too much with respect to IOU. output_size: an int32 tensor of size [batch_size]. Representing the number of selected boxes for each batch. idx: an integer scalar representing induction variable. Returns: boxes: updated boxes. iou_threshold: pass down iou_threshold to the next iteration. output_size: the updated output_size. idx: the updated induction variable. """ num_tiles = tf.shape(boxes)[1] // NMS_TILE_SIZE batch_size = tf.shape(boxes)[0] # Iterates over tiles that can possibly suppress the current tile. box_slice = tf.slice(boxes, [0, idx * NMS_TILE_SIZE, 0], [batch_size, NMS_TILE_SIZE, 4]) _, box_slice, _, _ = tf.while_loop( lambda _boxes, _box_slice, _threshold, inner_idx: inner_idx < idx, _cross_suppression, [boxes, box_slice, iou_threshold, tf.constant(0)]) # Iterates over the current tile to compute self-suppression. iou = box_utils.bbox_overlap(box_slice, box_slice) mask = tf.expand_dims( tf.reshape(tf.range(NMS_TILE_SIZE), [1, -1]) > tf.reshape( tf.range(NMS_TILE_SIZE), [-1, 1]), 0) iou *= tf.cast(tf.logical_and(mask, iou >= iou_threshold), iou.dtype) suppressed_iou, _, _ = tf.while_loop( lambda _iou, loop_condition, _iou_sum: loop_condition, _self_suppression, [iou, tf.constant(True), tf.reduce_sum(iou, [1, 2])]) suppressed_box = tf.reduce_sum(suppressed_iou, 1) > 0 box_slice *= tf.expand_dims(1.0 - tf.cast(suppressed_box, box_slice.dtype), 2) # Uses box_slice to update the input boxes. mask = tf.reshape(tf.cast(tf.equal(tf.range(num_tiles), idx), boxes.dtype), [1, -1, 1, 1]) boxes = tf.tile(tf.expand_dims( box_slice, [1]), [1, num_tiles, 1, 1]) * mask + tf.reshape( boxes, [batch_size, num_tiles, NMS_TILE_SIZE, 4]) * (1 - mask) boxes = tf.reshape(boxes, [batch_size, -1, 4]) # Updates output_size. output_size += tf.reduce_sum( tf.cast(tf.reduce_any(box_slice > 0, [2]), tf.int32), [1]) return boxes, iou_threshold, output_size, idx + 1
def _get_rpn_samples(self, match_results): """Computes anchor labels. This function performs subsampling for foreground (fg) and background (bg) anchors. Args: match_results: A integer tensor with shape [N] representing the matching results of anchors. (1) match_results[i]>=0, meaning that column i is matched with row match_results[i]. (2) match_results[i]=-1, meaning that column i is not matched. (3) match_results[i]=-2, meaning that column i is ignored. Returns: score_targets: a integer tensor with the a shape of [N]. (1) score_targets[i]=1, the anchor is a positive sample. (2) score_targets[i]=0, negative. (3) score_targets[i]=-1, the anchor is don't care (ignore). """ sampler = ( balanced_positive_negative_sampler.BalancedPositiveNegativeSampler( positive_fraction=self._rpn_fg_fraction, is_static=False)) # indicator includes both positive and negative labels. # labels includes only positives labels. # positives = indicator & labels. # negatives = indicator & !labels. # ignore = !indicator. indicator = tf.greater(match_results, -2) labels = tf.greater(match_results, -1) samples = sampler.subsample( indicator, self._rpn_batch_size_per_im, labels) positive_labels = tf.where( tf.logical_and(samples, labels), tf.constant(2, dtype=tf.int32, shape=match_results.shape), tf.constant(0, dtype=tf.int32, shape=match_results.shape)) negative_labels = tf.where( tf.logical_and(samples, tf.logical_not(labels)), tf.constant(1, dtype=tf.int32, shape=match_results.shape), tf.constant(0, dtype=tf.int32, shape=match_results.shape)) ignore_labels = tf.fill(match_results.shape, -1) return (ignore_labels + positive_labels + negative_labels, positive_labels, negative_labels)
def length_norm_fn(log_probs_BxM, length_int): """Normalize sum log probabilities given a sequence length.""" dtype = log_probs_BxM.dtype norm_flt = tf.pow(((start + tf.cast(length_int, dtype)) / (1. + start)), alpha) log_probs_BxM /= norm_flt too_short_bool = tf.less(length_int, min_len) too_long_bool = tf.logical_and(tf.greater(length_int, max_len), max_len > 0) out_of_range_bool = tf.logical_or(too_long_bool, too_short_bool) log_probs_BxM += out_of_range_penalty * tf.cast(out_of_range_bool, dtype) return log_probs_BxM
def __init__(self, batch_size, max_len): '''Class constructor''' self.data_train, self.data_valid = tfds.load( "ted_hrlr_translate/pt_to_en", split=['train', 'validation'], as_supervised=True ) self.tokenizer_pt, self.tokenizer_en = self.tokenize_dataset( self.data_train ) self.data_train = data_train.map(self.tf_encode) self.data_train.filter( lambda x, y: tf.logical_and( tf.size(x) <= max_len, tf.size(y) <= max_len ) ) self.data_train = self.data_train.cache() data_size = sum( 1 for __ in self.data_train ) self.data_train = self.data_train.shuffle( data_size ) self.data_train = self.data_train.padded_batch( batch_size ) self.data_train = self.data_train.prefetch( tf.data.experimental.AUTOTUNE ) self.data_valid = data_valid.map(self.tf_encode) self.data_valid.filter( lambda x, y: tf.logical_and( tf.size(x) <= max_len, tf.size(y) <= max_len ) ) self.data_valid = self.data_valid.padded_batch( batch_size )
def call(self, inputs, count_weights=None): if isinstance(inputs, (list, np.ndarray)): inputs = tf.convert_to_tensor(inputs) def expand_dims(inputs, axis): if tf_utils.is_sparse(inputs): return tf.sparse.expand_dims(inputs, axis) else: return tf.compat.v1.expand_dims(inputs, axis) original_shape = inputs.shape # In all cases, we should uprank scalar input to a single sample. if inputs.shape.rank == 0: inputs = expand_dims(inputs, -1) # One hot will unprank only if the final output dimension is not already 1. if self.output_mode == ONE_HOT: if inputs.shape[-1] != 1: inputs = expand_dims(inputs, -1) # TODO(b/190445202): remove output rank restriction. if inputs.shape.rank > 2: raise ValueError( "Received input shape {}, which would result in output rank {}. " "Currently only outputs up to rank 2 are supported.".format( original_shape, inputs.shape.rank)) if count_weights is not None and self.output_mode != COUNT: raise ValueError( "`count_weights` is not used when `output_mode` is not `'count'`. " "Received `count_weights={}`.".format(count_weights)) out_depth = self.num_tokens binary_output = self.output_mode in (MULTI_HOT, ONE_HOT) if isinstance(inputs, tf.SparseTensor): max_value = tf.reduce_max(inputs.values) min_value = tf.reduce_min(inputs.values) else: max_value = tf.reduce_max(inputs) min_value = tf.reduce_min(inputs) condition = tf.logical_and( tf.greater(tf.cast(out_depth, max_value.dtype), max_value), tf.greater_equal(min_value, tf.cast(0, min_value.dtype))) assertion = tf.Assert(condition, [ "Input values must be in the range 0 <= values < num_tokens" " with num_tokens={}".format(out_depth) ]) with tf.control_dependencies([assertion]): if self.sparse: return sparse_bincount(inputs, out_depth, binary_output, count_weights) else: return dense_bincount(inputs, out_depth, binary_output, count_weights)