def prune_completely_outside_window(boxlist, window, scope=None): """Prunes bounding boxes that fall completely outside of the given window. The function clip_to_window prunes bounding boxes that fall completely outside the window, but also clips any bounding boxes that partially overflow. This function does not clip partially overflowing boxes. Args: boxlist: a BoxList holding M_in boxes. window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax] of the window scope: name scope. Returns: pruned_boxlist: a new BoxList with all bounding boxes partially or fully in the window. valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes in the input tensor. """ with tf.name_scope(scope, 'PruneCompleteleyOutsideWindow'): y_min, x_min, y_max, x_max = tf.split(value=boxlist.get(), num_or_size_splits=4, axis=1) win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) coordinate_violations = tf.concat([ tf.greater_equal(y_min, win_y_max), tf.greater_equal(x_min, win_x_max), tf.less_equal(y_max, win_y_min), tf.less_equal(x_max, win_x_min) ], 1) valid_indices = tf.reshape( tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1]) return gather(boxlist, valid_indices), valid_indices
def loop_body_fn(matrices, column): # shape: (dim, log_num_results) column_values = tf.gather(matrices, [column], axis=1) # shape: (dim, log_num_results) should_be_updated = tf.logical_and( # Columns whose index is smaller than the degree of the primitive # polynomial are obtained from direction numbers and should not be # updated. tf.less_equal(tf.math.maximum(degree, column + 1), indices), # During a given iteration, only the next `n` columns (where `n` is the # degree of the primitive polynomial) should be updated. tf.less_equal(indices, column + degree)) # shape: (dim, log_num_results) updated_matrices = tf.bitwise.bitwise_xor( tf.where(tf.equal(indices, column + degree), tf.bitwise.right_shift(column_values, degree), matrices), utils.filter_tensor(column_values, polynomial, column + degree - indices)) # shape: (dim, log_num_results) returned_matrices = tf.where(should_be_updated, updated_matrices, matrices) return (returned_matrices, column + 1)
def proposal(seed): """Proposal for log-concave rejection sampler.""" (top_lobe_fractions_seed, exponential_samples_seed, top_selector_seed, rademacher_seed) = samplers.split_seed( seed, n=4, salt='log_concave_rejection_sampler_proposal') top_lobe_fractions = samplers.uniform( mode_shape, seed=top_lobe_fractions_seed, dtype=dtype) # V in ref [1]. top_offsets = top_lobe_fractions * top_width / mode_height exponential_samples = exponential_distribution.sample( mode_shape, seed=exponential_samples_seed) # E in ref [1]. exponential_height = (exponential_distribution.prob(exponential_samples) * mode_height) exponential_offsets = (top_width + exponential_samples) / mode_height top_selector = samplers.uniform( mode_shape, seed=top_selector_seed, dtype=dtype) # U in ref [1]. on_top_mask = tf.less_equal(top_selector, top_fraction) unsigned_offsets = tf.where(on_top_mask, top_offsets, exponential_offsets) offsets = tf.round( tfp_random.rademacher( mode_shape, seed=rademacher_seed, dtype=dtype) * unsigned_offsets) potential_samples = mode + offsets envelope_height = tf.where(on_top_mask, mode_height, exponential_height) return potential_samples, envelope_height
def test_hz_to_midi_is_accurate(self): """Tests converting between MIDI values and their frequencies in hertz.""" hz = np.linspace(0.0, 20000.0, 128) librosa_midi = librosa.hz_to_midi(hz) librosa_midi = tf.where(tf.less_equal(hz, 0.0), 0.0, librosa_midi) tf_midi = core.hz_to_midi(hz) self.assertAllClose(librosa_midi, tf_midi)
def to_4d(image: tf.Tensor) -> tf.Tensor: """Converts an input Tensor to 4 dimensions. 4D image => [N, H, W, C] or [N, C, H, W] 3D image => [1, H, W, C] or [1, C, H, W] 2D image => [1, H, W, 1] Args: image: The 2/3/4D input tensor. Returns: A 4D image tensor. Raises: `TypeError` if `image` is not a 2/3/4D tensor. """ shape = tf.shape(image) original_rank = tf.rank(image) left_pad = tf.cast(tf.less_equal(original_rank, 3), dtype=tf.int32) right_pad = tf.cast(tf.equal(original_rank, 2), dtype=tf.int32) new_shape = tf.concat( [ tf.ones(shape=left_pad, dtype=tf.int32), shape, tf.ones(shape=right_pad, dtype=tf.int32), ], axis=0, ) return tf.reshape(image, new_shape)
def from_4d(image: tf.Tensor, ndims: int) -> tf.Tensor: """Converts a 4D image back to `ndims` rank.""" shape = tf.shape(image) begin = tf.cast(tf.less_equal(ndims, 3), dtype=tf.int32) end = 4 - tf.cast(tf.equal(ndims, 2), dtype=tf.int32) new_shape = shape[begin:end] return tf.reshape(image, new_shape)
def randomized_computation(seed): seed_stream = SeedStream(seed, 'batched_rejection_sampler') proposed_samples, proposed_values = proposal(seed_stream()) good_samples_mask = tf.less_equal( proposed_values * tf.random.uniform( proposed_samples.shape, maxval=1., seed=seed_stream()), target(proposed_samples)) return proposed_samples, good_samples_mask
def randomized_computation(seed): seed_stream = SeedStream(seed, 'batched_rejection_sampler') proposed_samples, proposed_values = proposal_fn(seed_stream()) good_samples_mask = tf.less_equal( proposed_values * tf.random.uniform( prefer_static.shape(proposed_samples), seed=seed_stream(), dtype=dtype), target_fn(proposed_samples)) return proposed_samples, good_samples_mask
def randomized_computation(seed): """Internal randomized computation.""" proposal_seed, mask_seed = samplers.split_seed( seed, salt='batched_rejection_sampler') proposed_samples, proposed_values = proposal_fn(proposal_seed) good_samples_mask = tf.less_equal( proposed_values * samplers.uniform( prefer_static.shape(proposed_samples), seed=mask_seed, dtype=dtype), target_fn(proposed_samples)) return proposed_samples, good_samples_mask
def maybe_update_alpha(): """Maybe update the alpha param. Checks if global_step is between begin_compression_step and end_compression_step, and if the current training step is a compression step. Returns: Boolean tensor whether the training step is a compression step. """ is_step_within_compression_range = tf.logical_and( tf.greater_equal(tf.cast(self._global_step, tf.int32), self._spec.begin_compression_step), tf.logical_or( tf.less_equal(tf.cast(self._global_step, tf.int32), self._spec.end_compression_step), tf.less(self._spec.end_compression_step, 0))) is_compression_step = tf.less_equal( tf.add(self.last_alpha_update_step, self._spec.compression_frequency), tf.cast(self._global_step, tf.int32)) return tf.logical_and(is_step_within_compression_range, is_compression_step)
def _sample_control_dependencies(self, x): assertions = [] if not self.validate_args: return assertions loc = tf.convert_to_tensor(self.loc) scale = tf.convert_to_tensor(self.scale) concentration = tf.convert_to_tensor(self.concentration) assertions.append(assert_util.assert_greater_equal( x, loc, message='Sample must be greater than or equal to `loc`.')) assertions.append(assert_util.assert_equal( tf.logical_or(tf.greater_equal(concentration, 0), tf.less_equal(x, loc - scale / concentration)), True, message=('If `concentration < 0`, sample must be less than or ' 'equal to `loc - scale / concentration`.'), summarize=100)) return assertions
def body_fn(i, partial, outputs): """Body function for while_loop. Args: i: integer scalar partial: dictionary of Tensor (partially-constructed example) outputs: dictionary of TensorArray Returns: A triple containing the new values of the inputs. """ can_append = True one_example = {} for k in keys: val = tf.cast(x[k][i], tf.int32) val = val[:tf. reduce_sum(tf.cast(tf.not_equal(val, 0), tf.int32))] one_example[k] = val for k in keys: can_append = tf.logical_and( can_append, tf.less_equal( tf.size(partial[k]) + tf.size(one_example[k]), length[k])) def false_fn(): return write_packed_example(partial, outputs) def true_fn(): return partial, outputs partial, outputs = tf.cond(can_append, true_fn, false_fn) new_partial = {} for k in keys: new_seq = one_example[k][:length[k]] new_seq_len = tf.size(new_seq) new_partial[k] = tf.concat([partial[k], new_seq], 0) if _annotate_key(k): new_partial[k + '_position'] = tf.concat([ partial[k + '_position'], tf.range(new_seq_len, dtype=tf.int32) ], 0) partial = new_partial return i + 1, partial, outputs
def represent(self, waves): """Transform waves into a representation suited for the DS2 encoder.""" waves = tf.squeeze(waves, -1) # Re-scale. waves = waves / (tf.reduce_max(tf.abs(waves), axis=1, keepdims=True) + 1e-5) waves *= 32767 # To match PSF the following line should be uncommented. But it's not # supported by TPUs. # waves = tf.cast(tf.cast(waves, tf.int16), waves.dtype) # Matching PSF. # Determine frame and step sizes. window_size = int(self.sample_freq * self.window_size) window_step = int(self.sample_freq * self.window_step) # Compute STFT. fft_window = tf.signal.hann_window(window_size, periodic=False, dtype=waves.dtype) fft_window = tf.reshape(fft_window, [1, 1, window_size]) frames = tf.signal.frame(waves, window_size, window_step, True) # Do the slow DFT matmul because window size generally will not be a power # of 2. dft_w = scipy.linalg.dft(window_size).astype(np.complex64) stft = tf.matmul(tf.cast(fft_window * frames, dft_w.dtype), dft_w) mag = tf.abs(stft) / float(window_size) mag = tf.where(tf.less_equal(mag, 1e-30), tf.ones_like(mag) * 1e-30, mag) log_mag = 10. * tf.math.log(mag) / tf.math.log(10.) # Select features and standardize. features = log_mag[Ellipsis, :self.num_features] counts, means_ss, variance_ss, _ = tf.nn.sufficient_statistics( features, axes=[1, 2], keepdims=True) mean, variance = tf.nn.normalize_moments(counts, means_ss, variance_ss, None) features = (features - mean) / tf.sqrt(variance) return features
def get_shuffled_indices_and_labels(batch_size, num_samples, shuffle_fraction, num_steps): """Produce possibly shuffled indices and labels.""" total_num_samples = batch_size * num_samples num_shuffled_examples = int(shuffle_fraction * total_num_samples) shuffle_labels = tf.random.shuffle(tf.cast( num_shuffled_examples*[1] + (total_num_samples - num_shuffled_examples) * [0], tf.int32)) indices = tf.sort(random_choice_noreplace( total_num_samples, num_steps)[:, :5], axis=1) indices = randomly_reverse_indices(indices) shuffled_samples = tf.where( tf.less_equal(tf.random.uniform((total_num_samples, 1)), 0.5), tf.gather(indices, [1, 0, 3], axis=1), tf.gather(indices, [1, 4, 3], axis=1)) ordered_samples = tf.gather(indices, [1, 2, 3], axis=1) indices = tf.where(tf.equal(tf.expand_dims(shuffle_labels, axis=-1), 1), shuffled_samples, ordered_samples) return indices, shuffle_labels
def sample_and_preprocess(video, frame_labels, seq_len, name): """Samples frames and prepares them for training.""" # STEP 0: DECIDE NUMBER OF FRAMES TO SAMPLE AND AUGMENTATION STRATEGY # ACCORDING TO MODE (i.e. train vs test/val) if CONFIG.MODE == 'train': augment = True offset = 1 max_num_steps = CONFIG.TRAIN.NUM_FRAMES sampling_strategy = CONFIG.DATA.SAMPLING_STRATEGY sample_all = False sample_all_stride = None else: sampling_strategy = CONFIG.DATA.SAMPLING_STRATEGY augment = False offset = 1 if sampling_strategy == 'all': sample_all = True sample_all_stride = 1 max_num_steps = seq_len #400 else: sample_all = False sample_all_stride = None max_num_steps = CONFIG.EVAL.NUM_FRAMES # choose number of steps to sample num_steps = max_num_steps # STEP1: SAMPLE STEPS AND GET THEIR CONTEXT FRAMES FOR THE EMBEDDER if sample_all: steps = tf.range(0, seq_len, sample_all_stride) chosen_steps = steps else: if sampling_strategy == 'stride': num_steps = tf.cast(num_steps, tf.int64) stride = (seq_len / num_steps) stride = tf.cast(stride, tf.int64) if stride <= 0: stride = tf.cast(CONFIG.DATA.STRIDE, tf.int64) # Offset can be set between 0 and maximum location from which we can get # total coverage of the video without having to pad. offset = tf.cast(offset, tf.int64) if offset is None: offset = tf.random.uniform( (), 0, tf.maximum(tf.cast(1, tf.int64), seq_len - stride * num_steps), dtype=tf.int64) # This handles sampling over shorter sequences by padding the last frame # many times. This is not ideal for the way alignment training batches are # created. cur_steps = tf.minimum( seq_len - 1, tf.range(offset, offset + num_steps * stride + 1, stride)) cur_steps = cur_steps[:num_steps] elif sampling_strategy == 'random': # Sample a random offset less than a provided max offset. Among all frames # higher than the chosen offset, randomly sample num_frames check1 = tf.debugging.assert_greater_equal( seq_len, tf.cast(CONFIG.DATA.RANDOM_OFFSET, tf.int64), message='Random offset is more than sequence length.') check2 = tf.less_equal( tf.cast(num_steps, tf.int64), seq_len - tf.cast(CONFIG.DATA.RANDOM_OFFSET, tf.int64), ) def _sample_random(): with tf.control_dependencies([tf.identity(check1.outputs[0])]): offset = CONFIG.DATA.RANDOM_OFFSET steps = tf.random.shuffle(tf.range(offset, seq_len)) steps = tf.gather(steps, tf.range(0, num_steps)) #steps = tf.gather(steps, tf.range(0, seq_len)) #steps = tf.gather(steps, tf.random.uniform(shape=(num_steps,), minval=offset, maxval=seq_len, dtype=tf.int64)) steps = tf.gather( steps, tf.nn.top_k(steps, k=num_steps).indices[::-1]) steps = steps[:num_steps] return steps def _sample_all(): return tf.range(0, num_steps, dtype=tf.int64) cur_steps = tf.cond(check2, _sample_random, _sample_all) else: raise ValueError( 'Sampling strategy %s is unknown. Supported values are ' 'stride, offset_uniform and all for now.' % sampling_strategy) # Get multiple context steps depending on config at selected steps. steps = tf.reshape(tf.map_fn(get_steps, cur_steps), [-1]) # make sure that frame ID is never less than 1 or greater than (seq_len-1) steps = tf.maximum(tf.cast(0, tf.int64), steps) steps = tf.minimum(seq_len - 1, steps) # Store chosen indices. chosen_steps = cur_steps # Select data based on steps/ video = tf.gather(video, steps) if CONFIG.DATA.FRAME_LABELS: frame_labels = tf.gather(frame_labels, steps) # Decode the encoded JPEG images video = tf.map_fn(tf.image.decode_jpeg, video, parallel_iterations=FLAGS.num_parallel_calls, dtype=tf.uint8) # Take images in range [0, 255] and normalize to [0, 1] video = tf.map_fn(normalize_input, video, parallel_iterations=FLAGS.num_parallel_calls, dtype=tf.float32) # Perform data-augmentation and return images in range [-1, 1] video = preprocess_input(video, augment) if CONFIG.MODE == 'train': shape_all_steps = CONFIG.DATA.NUM_CONTEXT_FRAMES * max_num_steps # should be similar to shape of steps video.set_shape( [shape_all_steps, CONFIG.IMAGE_SIZE, CONFIG.IMAGE_SIZE, 3]) if CONFIG.MODE == 'train' and CONFIG.DATA.FRAME_LABELS: shape_all_steps = CONFIG.DATA.NUM_CONTEXT_FRAMES * max_num_steps # should be similar to shape of steps frame_labels.set_shape([shape_all_steps]) return { 'frames': video, 'chosen_steps': chosen_steps, 'seq_lens': seq_len, 'frame_labels': frame_labels, 'name': name, 'num_steps': num_steps, }
def find_interval_index(query_xs, interval_lower_xs, last_interval_is_closed=False, dtype=None, name=None): """Function to find the index of the interval where query points lies. Given a list of adjacent half-open intervals [x_0, x_1), [x_1, x_2), ..., [x_{n-1}, x_n), [x_n, inf), described by a list [x_0, x_1, ..., x_{n-1}, x_n]. Return the index where the input query points lie. If x >= x_n, n is returned, and if x < x_0, -1 is returned. If `last_interval_is_closed` is set to `True`, the last interval [x_{n-1}, x_n] is interpreted as closed (including x_n). #### Example ```python interval_lower_xs = [0.25, 0.5, 1.0, 2.0, 3.0] query_xs = [0.25, 3.0, 5.0, 0.0, 0.5, 0.8] result = find_interval_index(query_xs, interval_lower_xs) # result == [0, 4, 4, -1, 1, 1] ``` Args: query_xs: Rank 1 real `Tensor` of any size, the list of x coordinates for which the interval index is to be found. The values must be strictly increasing. interval_lower_xs: Rank 1 `Tensor` of the same shape and dtype as `query_xs`. The values x_0, ..., x_n that define the interval starts. last_interval_is_closed: If set to `True`, the last interval is interpreted as closed. dtype: Optional `tf.Dtype`. If supplied, the dtype for `query_xs` and `interval_lower_xs`. Default value: None which maps to the default dtype inferred by TensorFlow (float32). name: Optional name of the operation. Returns: A tensor that matches the shape of `query_xs` with dtype=int32 containing the indices of the intervals containing query points. `-1` means the query point lies before all intervals and `n-1` means that the point lies in the last half-open interval (if `last_interval_is_closed` is `False`) or that the point lies to the right of all intervals (if `last_interval_is_closed` is `True`). """ with tf.compat.v1.name_scope( name, default_name='find_interval_index', values=[query_xs, interval_lower_xs, last_interval_is_closed]): # TODO(b/138988951): add ability to validate that intervals are increasing. # TODO(b/138988951): validate that if last_interval_is_closed, input size # must be > 1. query_xs = tf.convert_to_tensor(query_xs, dtype=dtype) interval_lower_xs = tf.convert_to_tensor(interval_lower_xs, dtype=dtype) # Result assuming that last interval is half-open. indices = tf.searchsorted(interval_lower_xs, query_xs, side='right') - 1 # Handling the branch if the last interval is closed. last_index = tf.shape(interval_lower_xs)[-1] - 1 last_x = tf.gather(interval_lower_xs, [last_index], axis=-1) # should_cap is a tensor true where a cell is true iff indices is the last # index at that cell and the query x <= the right boundary of the last # interval. should_cap = tf.logical_and(tf.equal(indices, last_index), tf.less_equal(query_xs, last_x)) # cap to last_index if the query x is not in the last interval, otherwise, # cap to last_index - 1. caps = last_index - tf.cast(should_cap, dtype=tf.dtypes.int32) return tf.compat.v1.where(last_interval_is_closed, tf.minimum(indices, caps), indices)
def box_matching(boxes, gt_boxes, gt_classes): """Match boxes to groundtruth boxes. Given the proposal boxes and the groundtruth boxes and classes, perform the groundtruth matching by taking the argmax of the IoU between boxes and groundtruth boxes. Args: boxes: a tensor of shape of [batch_size, N, 4] representing the box coordiantes to be matched to groundtruth boxes. gt_boxes: a tensor of shape of [batch_size, MAX_INSTANCES, 4] representing the groundtruth box coordinates. It is padded with -1s to indicate the invalid boxes. gt_classes: [batch_size, MAX_INSTANCES] representing the groundtruth box classes. It is padded with -1s to indicate the invalid classes. Returns: matched_gt_boxes: a tensor of shape of [batch_size, N, 4], representing the matched groundtruth box coordinates for each input box. If the box does not overlap with any groundtruth boxes, the matched boxes of it will be set to all 0s. matched_gt_classes: a tensor of shape of [batch_size, N], representing the matched groundtruth classes for each input box. If the box does not overlap with any groundtruth boxes, the matched box classes of it will be set to 0, which corresponds to the background class. matched_gt_indices: a tensor of shape of [batch_size, N], representing the indices of the matched groundtruth boxes in the original gt_boxes tensor. If the box does not overlap with any groundtruth boxes, the index of the matched groundtruth will be set to -1. matched_iou: a tensor of shape of [batch_size, N], representing the IoU between the box and its matched groundtruth box. The matched IoU is the maximum IoU of the box and all the groundtruth boxes. iou: a tensor of shape of [batch_size, N, K], representing the IoU matrix between boxes and the groundtruth boxes. The IoU between a box and the invalid groundtruth boxes whose coordinates are [-1, -1, -1, -1] is -1. """ # Compute IoU between boxes and gt_boxes. # iou <- [batch_size, N, K] iou = box_utils.bbox_overlap(boxes, gt_boxes) # max_iou <- [batch_size, N] # 0.0 -> no match to gt, or -1.0 match to no gt matched_iou = tf.reduce_max(iou, axis=-1) # background_box_mask <- bool, [batch_size, N] background_box_mask = tf.less_equal(matched_iou, 0.0) argmax_iou_indices = tf.argmax(iou, axis=-1, output_type=tf.int32) argmax_iou_indices_shape = tf.shape(argmax_iou_indices) batch_indices = ( tf.expand_dims(tf.range(argmax_iou_indices_shape[0]), axis=-1) * tf.ones([1, argmax_iou_indices_shape[-1]], dtype=tf.int32)) gather_nd_indices = tf.stack([batch_indices, argmax_iou_indices], axis=-1) matched_gt_boxes = tf.gather_nd(gt_boxes, gather_nd_indices) matched_gt_boxes = tf.where( tf.tile(tf.expand_dims(background_box_mask, axis=-1), [1, 1, 4]), tf.zeros_like(matched_gt_boxes, dtype=tf.float32), matched_gt_boxes) matched_gt_classes = tf.gather_nd(gt_classes, gather_nd_indices) matched_gt_classes = tf.where(background_box_mask, tf.zeros_like(matched_gt_classes), matched_gt_classes) matched_gt_indices = tf.where(background_box_mask, -tf.ones_like(argmax_iou_indices), argmax_iou_indices) return (matched_gt_boxes, matched_gt_classes, matched_gt_indices, matched_iou, iou)
def pack_batch(x: Mapping[str, tf.Tensor]) -> Mapping[str, tf.Tensor]: """Internal function to map over. Consumes a batch of input examples and produces a variable number of output examples. Args: x: a single example Returns: a tf.data.Dataset """ keys = list(feature_lengths) partial = empty_example.copy() first_key, *_ = keys dynamic_batch_size = tf.shape(x[first_key])[0] outputs = {} for k in keys: outputs[k] = tf.TensorArray(tf.int32, size=0, dynamic_size=True, element_shape=[feature_lengths[k]]) outputs[k + "_positions"] = tf.TensorArray( tf.int32, size=0, dynamic_size=True, element_shape=[feature_lengths[k]]) for i in tf.range(0, dynamic_batch_size): tf.autograph.experimental.set_loop_options(shape_invariants=[( partial, {k: tf.TensorShape([None]) for k in keys_etc} ), (outputs, {k: tf.TensorShape(None) for k in keys_etc})]) can_append = True one_example = {} for k in keys: val = tf.cast(x[k][i], tf.int32) val = val[:tf. reduce_sum(tf.cast(tf.not_equal(val, 0), tf.int32))] one_example[k] = val for k in keys: can_append = tf.logical_and( can_append, tf.less_equal( tf.size(partial[k]) + tf.size(one_example[k]), feature_lengths[k])) if not can_append: partial, outputs = _write_packed_example(partial, outputs) new_partial = {} for k in keys: new_seq = one_example[k][:feature_lengths[k]] new_seq_len = tf.size(new_seq) new_partial[k] = tf.concat([partial[k], new_seq], 0) new_partial[k + "_positions"] = tf.concat([ partial[k + "_positions"], tf.range(new_seq_len, dtype=tf.int32) ], 0) partial = new_partial partial, outputs = _write_packed_example(partial, outputs) packed = {k: outputs[k].stack() for k in keys_etc} for k in keys: packed[k + "_segment_ids"] = (tf.cumsum( tf.cast(tf.equal(packed[k + "_positions"], 0), tf.int32), axis=1) * tf.cast(tf.not_equal(packed[k], 0), tf.int32)) return packed
def sample_and_preprocess(video, labels, seq_label, seq_len, name, num_steps, augment, sample_all=False, sample_all_stride=1, add_shape=False): """Samples frames and prepares them for training.""" if sample_all: # When dealing with very long videos we can choose to sub-sample to fit # data in memory. But be aware this also evaluates over a subset of frames. # Subsampling the validation set videos when reporting performance is not # recommended. steps = tf.range(0, seq_len, sample_all_stride) seq_len = tf.shape(steps)[0] chosen_steps = steps else: stride = CONFIG.DATA.STRIDE sampling_strategy = CONFIG.DATA.SAMPLING_STRATEGY # TODO(debidatta) : More flexible sampling if sampling_strategy == 'stride': # Offset can be set between 0 and maximum location from which we can get # total coverage of the video without having to pad. # This handles sampling over longer sequences. offset = tf.random.uniform( (), 0, tf.maximum(tf.cast(1, tf.int64), seq_len - stride * num_steps), dtype=tf.int64) # This handles sampling over shorter sequences by padding the last frame # many times. This is not ideal for the way alignment training batches are # created. steps = tf.minimum( seq_len - 1, tf.range(offset, offset + num_steps * stride + 1, stride)) steps = steps[:num_steps] elif sampling_strategy == 'offset_uniform': # Sample a random offset less than a provided max offset. Among all frames # higher than the chosen offset, randomly sample num_frames check1 = tf.debugging.assert_greater_equal( seq_len, tf.cast(CONFIG.DATA.RANDOM_OFFSET, tf.int64), message='Random offset is more than sequence length.') check2 = tf.less_equal( tf.cast(num_steps, tf.int64), seq_len - tf.cast(CONFIG.DATA.RANDOM_OFFSET, tf.int64), ) def _sample_random(): with tf.control_dependencies([tf.identity(check1.outputs[0])]): offset = CONFIG.DATA.RANDOM_OFFSET steps = tf.random.shuffle(tf.range(offset, seq_len)) steps = tf.gather(steps, tf.range(0, num_steps)) steps = tf.gather( steps, tf.nn.top_k(steps, k=num_steps).indices[::-1]) return steps def _sample_all(): return tf.range(0, num_steps, dtype=tf.int64) steps = tf.cond(check2, _sample_random, _sample_all) else: raise ValueError( 'Sampling strategy %s is unknown. Supported values are ' 'stride, offset_uniform .' % sampling_strategy) if not sample_all and 'tcn' in CONFIG.TRAINING_ALGO: pos_window = CONFIG.TCN.POSITIVE_WINDOW # pylint: disable=g-long-lambda pos_steps = tf.map_fn( lambda step: tf.random.uniform( (), minval=step - pos_window, maxval=step, dtype=tf.int64), steps) # pylint: enable=g-long-lambda steps = tf.stack([pos_steps, steps]) steps = tf.reshape(tf.transpose(steps), (-1, )) # Store chosen indices. chosen_steps = steps # Get multiple context steps depending on config at selected steps. steps = tf.reshape(tf.map_fn(get_steps, steps), [-1]) steps = tf.maximum(tf.cast(0, tf.int64), steps) steps = tf.minimum(seq_len - 1, steps) shape_all_steps = CONFIG.DATA.NUM_STEPS * num_steps if not sample_all and 'tcn' in CONFIG.TRAINING_ALGO: shape_all_steps *= 2 # Select data based on steps/ video = tf.gather(video, steps) # Decode the encoded JPEG images video = tf.map_fn(tf.image.decode_jpeg, video, parallel_iterations=FLAGS.num_parallel_calls, dtype=tf.uint8) # Take images in range [0, 255] and normalize to [0, 1] video = tf.map_fn(normalize_input, video, parallel_iterations=FLAGS.num_parallel_calls, dtype=tf.float32) # Perform data-augmentation and return images in range [-1, 1] video = preprocess_input(video, augment) if add_shape: video.set_shape( [shape_all_steps, CONFIG.IMAGE_SIZE, CONFIG.IMAGE_SIZE, 3]) if CONFIG.DATA.FRAME_LABELS: labels = tf.gather(labels, steps) if add_shape: labels.set_shape([shape_all_steps]) return { 'frames': video, 'frame_labels': labels, 'chosen_steps': chosen_steps, 'seq_lens': seq_len, 'seq_labels': seq_label, 'name': name }
def _static_subsample(self, indicator, batch_size, labels): """Returns subsampled minibatch. Args: indicator: boolean tensor of shape [N] whose True entries can be sampled. N should be a complie time constant. batch_size: desired batch size. This scalar cannot be None. labels: boolean tensor of shape [N] denoting positive(=True) and negative (=False) examples. N should be a complie time constant. Returns: sampled_idx_indicator: boolean tensor of shape [N], True for entries which are sampled. It ensures the length of output of the subsample is always batch_size, even when number of examples set to True in indicator is less than batch_size. Raises: ValueError: if labels and indicator are not 1D boolean tensors. """ # Check if indicator and labels have a static size. if not indicator.shape.is_fully_defined(): raise ValueError( 'indicator must be static in shape when is_static is' 'True') if not labels.shape.is_fully_defined(): raise ValueError('labels must be static in shape when is_static is' 'True') if not isinstance(batch_size, int): raise ValueError( 'batch_size has to be an integer when is_static is' 'True.') input_length = tf.shape(input=indicator)[0] # Set the number of examples set True in indicator to be at least # batch_size. num_true_sampled = tf.reduce_sum( input_tensor=tf.cast(indicator, tf.float32)) additional_false_sample = tf.less_equal( tf.cumsum(tf.cast(tf.logical_not(indicator), tf.float32)), batch_size - num_true_sampled) indicator = tf.logical_or(indicator, additional_false_sample) # Shuffle indicator and label. Need to store the permutation to restore the # order post sampling. permutation = tf.random.shuffle(tf.range(input_length)) indicator = ops.matmul_gather_on_zeroth_axis( tf.cast(indicator, tf.float32), permutation) labels = ops.matmul_gather_on_zeroth_axis(tf.cast(labels, tf.float32), permutation) # index (starting from 1) when indicator is True, 0 when False indicator_idx = tf.where(tf.cast(indicator, tf.bool), tf.range(1, input_length + 1), tf.zeros(input_length, tf.int32)) # Replace -1 for negative, +1 for positive labels signed_label = tf.where( tf.cast(labels, tf.bool), tf.ones(input_length, tf.int32), tf.scalar_mul(-1, tf.ones(input_length, tf.int32))) # negative of index for negative label, positive index for positive label, # 0 when indicator is False. signed_indicator_idx = tf.multiply(indicator_idx, signed_label) sorted_signed_indicator_idx = tf.nn.top_k(signed_indicator_idx, input_length, sorted=True).values [num_positive_samples, num_negative_samples ] = self._get_num_pos_neg_samples(sorted_signed_indicator_idx, batch_size) sampled_idx = self._get_values_from_start_and_end( sorted_signed_indicator_idx, num_positive_samples, num_negative_samples, batch_size) # Shift the indices to start from 0 and remove any samples that are set as # False. sampled_idx = tf.abs(sampled_idx) - tf.ones(batch_size, tf.int32) sampled_idx = tf.multiply( tf.cast(tf.greater_equal(sampled_idx, tf.constant(0)), tf.int32), sampled_idx) sampled_idx_indicator = tf.cast( tf.reduce_sum(input_tensor=tf.one_hot(sampled_idx, depth=input_length), axis=0), tf.bool) # project back the order based on stored permutations reprojections = tf.one_hot(permutation, depth=input_length, dtype=tf.float32) return tf.cast( tf.tensordot(tf.cast(sampled_idx_indicator, tf.float32), reprojections, axes=[0, 0]), tf.bool)