def update_state(self, inputs, outputs): """Function that updates the metric state at each example. Args: inputs: A dictionary containing input tensors. outputs: A dictionary containing output tensors. Returns: Update op. """ detections_score = tf.reshape( outputs[standard_fields.DetectionResultFields.objects_score], [-1]) detections_class = tf.reshape( outputs[standard_fields.DetectionResultFields.objects_class], [-1]) num_detections = tf.shape(detections_score)[0] detections_instance_mask = tf.reshape( outputs[ standard_fields.DetectionResultFields.instance_segments_voxel_mask], [num_detections, -1]) gt_class = tf.reshape(inputs[standard_fields.InputDataFields.objects_class], [-1]) num_gt = tf.shape(gt_class)[0] gt_voxel_instance_ids = tf.reshape( inputs[standard_fields.InputDataFields.object_instance_id_voxels], [-1]) gt_instance_masks = tf.transpose( tf.one_hot(gt_voxel_instance_ids - 1, depth=num_gt, dtype=tf.float32)) for c in self.class_range: gt_mask_c = tf.equal(gt_class, c) num_gt_c = tf.math.reduce_sum(tf.cast(gt_mask_c, dtype=tf.int32)) gt_instance_masks_c = tf.boolean_mask(gt_instance_masks, gt_mask_c) detections_mask_c = tf.equal(detections_class, c) num_detections_c = tf.math.reduce_sum( tf.cast(detections_mask_c, dtype=tf.int32)) if num_detections_c == 0: continue det_scores_c = tf.boolean_mask(detections_score, detections_mask_c) det_instance_mask_c = tf.boolean_mask(detections_instance_mask, detections_mask_c) det_scores_c, sorted_indices = tf.math.top_k( det_scores_c, k=num_detections_c) det_instance_mask_c = tf.gather(det_instance_mask_c, sorted_indices) tp_c = tf.zeros([num_detections_c], dtype=tf.int32) if num_gt_c > 0: ious_c = instance_segmentation_utils.points_mask_iou( masks1=gt_instance_masks_c, masks2=det_instance_mask_c) max_overlap_gt_ids = tf.cast( tf.math.argmax(ious_c, axis=0), dtype=tf.int32) is_gt_box_detected = tf.zeros([num_gt_c], dtype=tf.int32) for i in tf.range(num_detections_c): gt_id = max_overlap_gt_ids[i] if (ious_c[gt_id, i] > self.iou_threshold and is_gt_box_detected[gt_id] == 0): tp_c = tf.maximum( tf.one_hot(i, num_detections_c, dtype=tf.int32), tp_c) is_gt_box_detected = tf.maximum( tf.one_hot(gt_id, num_gt_c, dtype=tf.int32), is_gt_box_detected) self.tp[c] = tf.concat([self.tp[c], tp_c], axis=0) self.scores[c] = tf.concat([self.scores[c], det_scores_c], axis=0) self.num_gt[c] += num_gt_c return tf.no_op()
def grad_fn(dy): """Compute gradients using a while loop to save memory.""" support_keys_id = tf.identity(support_keys) support_values_id = tf.identity(support_values) initial = (0, tf.zeros(tf.shape(query_queries)[1:], dtype=dy.dtype)[tf.newaxis, :][:zero_dim], tf.zeros(tf.shape(query_values)[1:], dtype=dy.dtype)[tf.newaxis, :][:zero_dim], tf.zeros(tf.shape(support_keys_id), dtype=dy.dtype), tf.zeros(tf.shape(support_values_id), dtype=dy.dtype)) def loop_body(idx, qq_grad, qv_grad, sk_grad, sv_grad): """Compute gradients for a single query.""" qq = query_queries[idx:idx + 1] qv = query_values[idx:idx + 1] x = self._get_dist(qq, qv, support_keys_id, support_values_id, labels) grads = tf.gradients( x, [qq, qv, support_keys_id, support_values_id], grad_ys=dy[:, idx:idx + 1]) qq_grad = tf.concat([qq_grad, grads[0]], axis=0) qv_grad = tf.concat([qv_grad, grads[1]], axis=0) sk_grad += grads[2] sv_grad += grads[3] return (idx + 1, qq_grad, qv_grad, sk_grad, sv_grad) agg_grads = tf.while_loop( lambda *arg: arg[0] < tf.shape(query_queries)[0], loop_body, initial, parallel_iterations=1) return agg_grads[1:] + (None,)
def build_task_parameters(self): """Assign to attributes the meta parameters.""" self.locs = [ tf.Variable(tf.zeros((self.num_dims)), name='loc_{}'.format(i)) for i in range(self.num_components) ] self.log_scales = [ tf.Variable(tf.zeros((self.num_dims)), name='log_scale_{}'.format(i)) for i in range(self.num_components) ]
def ctrl_rewards(states, actions, rewards, next_states, contexts, reward_scales=1.0): """Returns the negative control cost. Args: states: A [batch_size, num_state_dims] Tensor representing a batch of states. actions: A [batch_size, num_action_dims] Tensor representing a batch of actions. rewards: A [batch_size] Tensor representing a batch of rewards. next_states: A [batch_size, num_state_dims] Tensor representing a batch of next states. contexts: A list of [batch_size, num_context_dims] Tensor representing a batch of contexts. reward_scales: multiplicative scale for rewards. A scalar or 1D tensor, must be broadcastable to number of reward dimensions. Returns: A new tf.float32 [batch_size] rewards Tensor, and tf.float32 [batch_size] discounts tensor. """ del states, rewards, contexts # Unused if actions is None: rewards = tf.to_float(tf.zeros(shape=next_states.shape[:1])) else: rewards = -tf.reduce_sum(tf.square(actions), axis=1) rewards *= reward_scales rewards = tf.to_float(rewards) return rewards, tf.ones_like(rewards)
def rollout(self, max_num_steps, time_step, policy_state): counter = tf.zeros((), tf.int32) batch_size = self._env.batch_size maximum_iterations = math.ceil(max_num_steps / self._env.batch_size) def create_ta(s): return tf.TensorArray(dtype=s.dtype, size=maximum_iterations, element_shape=tf.TensorShape( [batch_size]).concatenate(s.shape)) training_info_ta = tf.nest.map_structure( create_ta, self._training_info_spec._replace( rollout_info=nest_utils.to_distribution_param_spec( self._training_info_spec.rollout_info))) [counter, time_step, policy_state, training_info_ta] = tf.while_loop( cond=lambda *_: True, body=self._rollout_loop_body, loop_vars=[counter, time_step, policy_state, training_info_ta], maximum_iterations=maximum_iterations, back_prop=False, name="rollout_loop") training_info = tf.nest.map_structure(lambda ta: ta.stack(), training_info_ta) training_info = nest_utils.params_to_distributions( training_info, self._training_info_spec) self._algorithm.summarize_rollout(training_info) self._algorithm.summarize_metrics() return time_step, policy_state
def components(self): """A list of tfd.Distributions constructed on-the-fly from task params.""" def _make_shift_and_log_scale_fn(shift_and_log_scale_model, num_masked): """Returns a function that computes shift and log-scale coefficients. RealNVP expects the function to accept an `output_dims` argument, which allows lazy variable instantiation. We already know what its value is (`num_dims - num_masked`), so we simply make sure it's what we expect. Args: shift_and_log_scale_model: A Keras model that computes the fprop. num_masked: int, number of masked unit. Returns: A function that computes shift and log-scale coefficients. """ def _shift_and_log_scale_fn(x, output_dims, *args, **kwargs): del args del kwargs if output_dims != self.num_dims - num_masked: raise ValueError('Expected {} output_dims, got {}.'.format( self.num_dims - num_masked, output_dims)) return tf.split(shift_and_log_scale_model(x), 2, axis=-1) return _shift_and_log_scale_fn class_bijectors = [] for j in range(self.num_components): bijectors = [] for i, shift_and_log_scale_model in enumerate( getattr(self, self.task_attribute_name)[j]): num_masked = self._compute_num_masked(i) # Create functions to compute the shift and log-scale coefficients from # the parameterized shift_and_log_scale_models. shift_and_log_scale_fn = _make_shift_and_log_scale_fn( shift_and_log_scale_model, num_masked) bijector = tfb.RealNVP( num_masked=num_masked, shift_and_log_scale_fn=shift_and_log_scale_fn) # We reverse the order of units in-between RealNVP coupling layers, # which allows us to chain them. if i > 0: bijectors.append( tfb.Permute(list(range(self.num_dims))[::-1])) bijectors.append(bijector) class_bijectors.append(bijectors) return [ tfd.TransformedDistribution( distribution=tfd.MultivariateNormalDiag( tf.zeros([self.num_dims])), bijector=tfb.Chain(bijectors)) for bijectors in class_bijectors ]
def per_voxel_point_sample_segment_func(data, segment_ids, num_segments, num_samples_per_voxel): """Samples features from the points within each voxel. Args: data: A tf.float32 tensor of size [N, F]. segment_ids: A tf.int32 tensor of size [N]. num_segments: Number of segments. num_samples_per_voxel: Number of features to sample per voxel. If the voxel has less number of points in it, the point features will be padded by 0. Returns: A tf.float32 tensor of size [num_segments, num_samples_per_voxel, F]. A tf.int32 indices of size [N, num_samples_per_voxel]. """ num_channels = data.get_shape().as_list()[1] if num_channels is None: raise ValueError('num_channels is None.') n = tf.shape(segment_ids)[0] def _body_fn(i, indices_range, indices): """Computes the indices of the i-th point feature in each segment.""" indices_i = tf.math.unsorted_segment_max(data=indices_range, segment_ids=segment_ids, num_segments=num_segments) indices_i_positive_mask = tf.greater(indices_i, 0) indices_i_positive = tf.boolean_mask(indices_i, indices_i_positive_mask) boolean_mask = tf.scatter_nd(indices=tf.cast(tf.expand_dims( indices_i_positive - 1, axis=1), dtype=tf.int64), updates=tf.ones_like(indices_i_positive, dtype=tf.int32), shape=(n, )) indices_range *= (1 - boolean_mask) indices_i *= tf.cast(indices_i_positive_mask, dtype=tf.int32) indices_i = tf.pad(tf.expand_dims(indices_i, axis=1), paddings=[[0, 0], [i, num_samples_per_voxel - i - 1]]) indices += indices_i i = i + 1 return i, indices_range, indices cond = lambda i, indices_range, indices: i < num_samples_per_voxel (_, _, indices) = tf.while_loop( cond=cond, body=_body_fn, loop_vars=(tf.constant(0, dtype=tf.int32), tf.range(n) + 1, tf.zeros([num_segments, num_samples_per_voxel], dtype=tf.int32))) data = tf.pad(data, paddings=[[1, 0], [0, 0]]) voxel_features = tf.gather(data, tf.reshape(indices, [-1])) return tf.reshape(voxel_features, [num_segments, num_samples_per_voxel, num_channels])
def get_fc_vars_copy_ops(fc_weights, fc_bias, make_copies): """Gets copies of the classifier layer variables or returns those variables. At meta-test time, a copy is created for the given Variables, and these copies copies will be used in place of the original ones. Args: fc_weights: A Variable for the weights of the fc layer. fc_bias: A Variable for the bias of the fc layer. make_copies: A bool. Whether to copy the given variables. If not, those variables themselves are returned. Returns: fc_weights: A Variable for the weights of the fc layer. Might be the same as the input fc_weights or a copy of it. fc_bias: Analogously, a Variable for the bias of the fc layer. fc_vars_copy_ops: A (possibly empty) list of operations for assigning the value of each of fc_weights and fc_bias to a respective copy variable. """ fc_vars_copy_ops = [] if make_copies: with tf.variable_scope('weight_copy'): # fc_weights copy fc_weights_copy = tf.Variable( tf.zeros(fc_weights.shape.as_list()), collections=[tf.GraphKeys.LOCAL_VARIABLES]) fc_weights_copy_op = tf.assign(fc_weights_copy, fc_weights) fc_vars_copy_ops.append(fc_weights_copy_op) # fc_bias copy fc_bias_copy = tf.Variable( tf.zeros(fc_bias.shape.as_list()), collections=[tf.GraphKeys.LOCAL_VARIABLES]) fc_bias_copy_op = tf.assign(fc_bias_copy, fc_bias) fc_vars_copy_ops.append(fc_bias_copy_op) fc_weights = fc_weights_copy fc_bias = fc_bias_copy return fc_weights, fc_bias, fc_vars_copy_ops
def _iter(self, time_step, policy_state): """One training iteration.""" counter = tf.zeros((), tf.int32) batch_size = self._env.batch_size def create_ta(s): return tf.TensorArray(dtype=s.dtype, size=self._train_interval, element_shape=tf.TensorShape( [batch_size]).concatenate(s.shape)) training_info_ta = tf.nest.map_structure( create_ta, self._training_info_spec._replace( info=nest_utils.to_distribution_param_spec( self._training_info_spec.info))) with tf.GradientTape(watch_accessed_variables=False, persistent=True) as tape: tape.watch(self._trainable_variables) [counter, next_time_step, next_state, training_info_ta ] = tf.while_loop(cond=lambda *_: True, body=self._train_loop_body, loop_vars=[ counter, time_step, policy_state, training_info_ta ], back_prop=True, parallel_iterations=1, maximum_iterations=self._train_interval, name='iter_loop') training_info = tf.nest.map_structure(lambda ta: ta.stack(), training_info_ta) training_info = nest_utils.params_to_distributions( training_info, self._training_info_spec) loss_info, grads_and_vars = self._algorithm.train_complete( tape, training_info) del tape self._algorithm.summarize_train(training_info, loss_info, grads_and_vars) self._algorithm.summarize_metrics() common.get_global_counter().assign_add(1) return [next_time_step, next_state]
def tf_random_choice(inputs, n_samples): """ With replacement. Params: inputs (Tensor): Shape [n_states, n_features] n_samples (int): The number of random samples to take. Returns: sampled_inputs (Tensor): Shape [n_samples, n_features] """ # (1, n_states) since multinomial requires 2D logits. uniform_log_prob = tf.expand_dims(tf.zeros(tf.shape(inputs)[0]), 0) ind = tf.multinomial(uniform_log_prob, n_samples) ind = tf.squeeze(ind, 0, name="random_choice_ind") # (n_samples,) return tf.gather(inputs, ind, name="random_choice")
def identity_knn_graph_unbatched(points, k): """Returns each points as its own neighbor k times. Args: points: A tf.float32 tensor of [N, D] where D is the point dimensions. k: Number of neighbors for each point. Returns: distances: A tf.float32 tensor of [N, k]. Distances is all zeros since each point is returned as its own neighbor. indices: A tf.int32 tensor of [N, k]. Each row will contain values that are identical to the index of that row. """ num_points = tf.shape(points)[0] indices = tf.expand_dims(tf.range(num_points), axis=1) indices = tf.tile(indices, [1, k]) distances = tf.zeros([num_points, k], dtype=tf.float32) return distances, indices
def _calc_cost_for_action_sequence(self, time_step: ActionTimeStep, state, ac_seqs): """ Args: time_step (ActionTimeStep): input data for next step prediction state (MbrlState): input state for next step prediction ac_seqs: action_sequence (tf.Tensor) of shape [batch_size, population_size, solution_dim]), where solution_dim = planning_horizon * num_actions Returns: cost (tf.Tensor) with shape [batch_size, population_size] """ obs = time_step.observation batch_size = obs.shape[0] init_costs = tf.zeros([batch_size, self._population_size]) ac_seqs = tf.reshape( ac_seqs, [batch_size, self._population_size, self._planning_horizon, -1]) ac_seqs = tf.reshape(tf.transpose(ac_seqs, [2, 0, 1, 3]), [self._planning_horizon, -1, self._num_actions]) state = state._replace(dynamics=state.dynamics._replace(feature=obs)) init_obs = self._expand_to_population(obs) state = tf.nest.map_structure(self._expand_to_population, state) obs = init_obs cost = 0 for i in range(ac_seqs.shape[0]): action = ac_seqs[i] time_step = time_step._replace(prev_action=action) time_step, state = self._dynamics_func(time_step, state) next_obs = time_step.observation # Note: currently using (next_obs, action), might need to # consider (obs, action) in order to be more compatible # with the conventional definition of reward function reward_step = self._reward_func(next_obs, action) cost = cost - reward_step obs = next_obs # reshape cost back to [batch size, population_size] cost = tf.reshape(cost, [batch_size, -1]) return cost
def fwd_fn(query_queries_fwd, query_values_fwd, support_keys_fwd, support_values_fwd, labels_fwd): """CrossTransformer forward, using a while loop to save memory.""" initial = (0, tf.zeros([tf.reduce_max(labels) + 1, zero_dim], dtype=query_queries_fwd.dtype)) def loop_body(idx, dist): dist_new = self._get_dist(query_queries_fwd[idx:idx + 1], query_values_fwd[idx:idx + 1], support_keys_fwd, support_values_fwd, labels_fwd) dist = tf.concat([dist, dist_new], axis=1) return (idx + 1, dist) _, res = tf.while_loop( lambda x, _: x < tf.shape(query_queries_fwd)[0], loop_body, initial, parallel_iterations=1) return res
def get_embeddings_vars_copy_ops(embedding_vars_dict, make_copies): """Gets copies of the embedding variables or returns those variables. This is useful at meta-test time for MAML and the finetuning baseline. In particular, at meta-test time, we don't want to make permanent updates to the model's variables, but only modifications that persist in the given episode. This can be achieved by creating copies of each variable and modifying and using these copies instead of the variables themselves. Args: embedding_vars_dict: A dict mapping each variable name to the corresponding Variable. make_copies: A bool. Whether to copy the given variables. If not, those variables themselves will be returned. Typically, this is True at meta- test time and False at meta-training time. Returns: embedding_vars_keys: A list of variable names. embeddings_vars: A corresponding list of Variables. embedding_vars_copy_ops: A (possibly empty) list of operations, each of which assigns the value of one of the provided Variables to a new Variable which is its copy. """ embedding_vars_keys = [] embedding_vars = [] embedding_vars_copy_ops = [] for name, var in six.iteritems(embedding_vars_dict): embedding_vars_keys.append(name) if make_copies: with tf.variable_scope('weight_copy'): shape = var.shape.as_list() var_copy = tf.Variable( tf.zeros(shape), collections=[tf.GraphKeys.LOCAL_VARIABLES]) var_copy_op = tf.assign(var_copy, var) embedding_vars_copy_ops.append(var_copy_op) embedding_vars.append(var_copy) else: embedding_vars.append(var) return embedding_vars_keys, embedding_vars, embedding_vars_copy_ops
def identity_knn_graph(points, num_valid_points, k): # pylint: disable=unused-argument """Returns each points as its own neighbor k times. Args: points: A tf.float32 tensor of size [num_batches, N, D] where D is the point dimensions. num_valid_points: A tf.int32 tensor of size [num_batches] containing the number of valid points in each batch example. k: Number of neighbors for each point. Returns: distances: A tf.float32 tensor of [num_batches, N, k]. Distances is all zeros since each point is returned as its own neighbor. indices: A tf.int32 tensor of [num_batches, N, k]. Each row will contain values that are identical to the index of that row. """ num_batches = points.get_shape()[0] num_points = tf.shape(points)[1] indices = tf.expand_dims(tf.range(num_points), axis=1) indices = tf.tile(tf.expand_dims(indices, axis=0), [num_batches, 1, k]) distances = tf.zeros([num_batches, num_points, k], dtype=tf.float32) return distances, indices
def _iter(self, time_step, policy_state): """One training iteration.""" counter = tf.zeros((), tf.int32) batch_size = self._env.batch_size def create_ta(s): return tf.TensorArray(dtype=s.dtype, size=self._train_interval + 1, element_shape=tf.TensorShape( [batch_size]).concatenate(s.shape)) training_info_ta = tf.nest.map_structure(create_ta, self._training_info_spec) with tf.GradientTape(watch_accessed_variables=False, persistent=True) as tape: tape.watch(self._trainable_variables) [counter, time_step, policy_state, training_info_ta ] = tf.while_loop(cond=lambda *_: True, body=self._train_loop_body, loop_vars=[ counter, time_step, policy_state, training_info_ta ], back_prop=True, parallel_iterations=1, maximum_iterations=self._train_interval, name='iter_loop') if self._final_step_mode == OnPolicyDriver.FINAL_STEP_SKIP: next_time_step, policy_step, action = self._step( time_step, policy_state) next_state = policy_step.state else: policy_step = common.algorithm_step(self._algorithm.rollout, self._observation_transformer, time_step, policy_state) action = common.sample_action_distribution(policy_step.action) next_time_step = time_step next_state = policy_state action_distribution_param = common.get_distribution_params( policy_step.action) final_training_info = make_training_info( action_distribution=action_distribution_param, action=action, reward=time_step.reward, discount=time_step.discount, step_type=time_step.step_type, info=policy_step.info) with tape: training_info_ta = tf.nest.map_structure( lambda ta, x: ta.write(counter, x), training_info_ta, final_training_info) training_info = tf.nest.map_structure(lambda ta: ta.stack(), training_info_ta) action_distribution = nested_distributions_from_specs( self._algorithm.action_distribution_spec, training_info.action_distribution) training_info = training_info._replace( action_distribution=action_distribution) loss_info, grads_and_vars = self._algorithm.train_complete( tape, training_info) del tape self._training_summary(training_info, loss_info, grads_and_vars) self._train_step_counter.assign_add(1) return next_time_step, next_state
def _create_ou_process(action_spec): return tfa_common.OUProcess( lambda: tf.zeros(action_spec.shape, dtype=action_spec.dtype), ou_damping, ou_stddev, seed=seed_stream())
def linear_classifier(embeddings, num_classes, cosine_classifier, cosine_logits_multiplier, use_weight_norm, weight_decay): """Forward pass through a linear classifier, or possibly a cosine classifier. Args: embeddings: A Tensor of size [batch size, embedding dim]. num_classes: An integer; the dimension of the classification. cosine_classifier: A bool. If true, a cosine classifier is used, which does not require a bias. cosine_logits_multiplier: A float. Only used if cosine_classifier is True, and multiplies the resulting logits. use_weight_norm: A bool. Whether weight norm was used. If so, then if using cosine classifier, normalize only the embeddings but not the weights. weight_decay: A float; the scalar multiple on the L2 regularization of the weight matrix. Returns: logits: A Tensor of size [batch size, num outputs]. """ embedding_dims = embeddings.get_shape().as_list()[-1] if use_weight_norm: # A variable to keep track of whether the initialization has already # happened. data_dependent_init_done = tf.get_variable('data_dependent_init_done', initializer=0, dtype=tf.int32, trainable=False) w_fc = tf.get_variable('w_fc', [embedding_dims, num_classes], initializer=tf.random_normal_initializer( 0, 0.05), trainable=True) # This init is temporary as it needs to be done in a data-dependent way. # It will be overwritten during the first forward pass through this layer. g = tf.get_variable('g', dtype=tf.float32, initializer=tf.ones([num_classes]), trainable=True) b_fc = None if not cosine_classifier: # Also initialize a bias. b_fc = tf.get_variable('b_fc', initializer=tf.zeros([num_classes]), trainable=True) def _do_data_dependent_init(): """Returns ops for the data-dependent init of g and maybe b_fc.""" w_fc_normalized = tf.nn.l2_normalize(w_fc.read_value(), [0]) output_init = tf.matmul(embeddings, w_fc_normalized) mean_init, var_init = tf.nn.moments(output_init, [0]) # Data-dependent init values. g_init_value = 1. / tf.sqrt(var_init + 1e-10) ops = [tf.assign(g, g_init_value)] if not cosine_classifier: # Also initialize a bias in a data-dependent way. b_fc_init_value = -mean_init * g_init_value ops.append(tf.assign(b_fc, b_fc_init_value)) # Mark that the data-dependent initialization is done to prevent it from # happening again in the future. ops.append(tf.assign(data_dependent_init_done, 1)) return tf.group(*ops) # Possibly perform data-dependent init (if it hasn't been done already). init_op = tf.cond(tf.equal(data_dependent_init_done, 0), _do_data_dependent_init, tf.no_op) with tf.control_dependencies([init_op]): # Apply weight normalization. w_fc *= g / tf.sqrt(tf.reduce_sum(tf.square(w_fc), [0])) # Forward pass through the layer defined by w_fc and b_fc. logits = linear_classifier_forward_pass(embeddings, w_fc, b_fc, cosine_classifier, cosine_logits_multiplier, True) else: # No weight norm. w_fc = functional_backbones.weight_variable( [embedding_dims, num_classes], weight_decay=weight_decay) b_fc = None if not cosine_classifier: # Also initialize a bias. b_fc = functional_backbones.bias_variable([num_classes]) # Forward pass through the layer defined by w_fc and b_fc. logits = linear_classifier_forward_pass(embeddings, w_fc, b_fc, cosine_classifier, cosine_logits_multiplier, False) return logits
def update_state(self, inputs, outputs): """Function that updates the metric state at each example. Args: inputs: A dictionary containing input tensors. outputs: A dictionary containing output tensors. Returns: Update op. """ detections_score = tf.reshape( outputs[standard_fields.DetectionResultFields.objects_score], [-1]) detections_class = tf.reshape( outputs[standard_fields.DetectionResultFields.objects_class], [-1]) detections_length = tf.reshape( outputs[standard_fields.DetectionResultFields.objects_length], [-1]) detections_height = tf.reshape( outputs[standard_fields.DetectionResultFields.objects_height], [-1]) detections_width = tf.reshape( outputs[standard_fields.DetectionResultFields.objects_width], [-1]) detections_center = tf.reshape( outputs[standard_fields.DetectionResultFields.objects_center], [-1, 3]) detections_rotation_matrix = tf.reshape( outputs[ standard_fields.DetectionResultFields.objects_rotation_matrix], [-1, 3, 3]) gt_class = tf.reshape( inputs[standard_fields.InputDataFields.objects_class], [-1]) gt_length = tf.reshape( inputs[standard_fields.InputDataFields.objects_length], [-1]) gt_height = tf.reshape( inputs[standard_fields.InputDataFields.objects_height], [-1]) gt_width = tf.reshape( inputs[standard_fields.InputDataFields.objects_width], [-1]) gt_center = tf.reshape( inputs[standard_fields.InputDataFields.objects_center], [-1, 3]) gt_rotation_matrix = tf.reshape( inputs[standard_fields.InputDataFields.objects_rotation_matrix], [-1, 3, 3]) for c in self.class_range: gt_mask_c = tf.equal(gt_class, c) num_gt_c = tf.math.reduce_sum(tf.cast(gt_mask_c, dtype=tf.int32)) gt_length_c = tf.boolean_mask(gt_length, gt_mask_c) gt_height_c = tf.boolean_mask(gt_height, gt_mask_c) gt_width_c = tf.boolean_mask(gt_width, gt_mask_c) gt_center_c = tf.boolean_mask(gt_center, gt_mask_c) gt_rotation_matrix_c = tf.boolean_mask(gt_rotation_matrix, gt_mask_c) detections_mask_c = tf.equal(detections_class, c) num_detections_c = tf.math.reduce_sum( tf.cast(detections_mask_c, dtype=tf.int32)) if num_detections_c == 0: continue det_length_c = tf.boolean_mask(detections_length, detections_mask_c) det_height_c = tf.boolean_mask(detections_height, detections_mask_c) det_width_c = tf.boolean_mask(detections_width, detections_mask_c) det_center_c = tf.boolean_mask(detections_center, detections_mask_c) det_rotation_matrix_c = tf.boolean_mask(detections_rotation_matrix, detections_mask_c) det_scores_c = tf.boolean_mask(detections_score, detections_mask_c) det_scores_c, sorted_indices = tf.math.top_k(det_scores_c, k=num_detections_c) det_length_c = tf.gather(det_length_c, sorted_indices) det_height_c = tf.gather(det_height_c, sorted_indices) det_width_c = tf.gather(det_width_c, sorted_indices) det_center_c = tf.gather(det_center_c, sorted_indices) det_rotation_matrix_c = tf.gather(det_rotation_matrix_c, sorted_indices) tp_c = tf.zeros([num_detections_c], dtype=tf.int32) if num_gt_c > 0: ious_c = box_ops.iou3d( boxes1_length=gt_length_c, boxes1_height=gt_height_c, boxes1_width=gt_width_c, boxes1_center=gt_center_c, boxes1_rotation_matrix=gt_rotation_matrix_c, boxes2_length=det_length_c, boxes2_height=det_height_c, boxes2_width=det_width_c, boxes2_center=det_center_c, boxes2_rotation_matrix=det_rotation_matrix_c) max_overlap_gt_ids = tf.cast(tf.math.argmax(ious_c, axis=0), dtype=tf.int32) is_gt_box_detected = tf.zeros([num_gt_c], dtype=tf.int32) for i in tf.range(num_detections_c): gt_id = max_overlap_gt_ids[i] if (ious_c[gt_id, i] > self.iou_threshold and is_gt_box_detected[gt_id] == 0): tp_c = tf.maximum( tf.one_hot(i, num_detections_c, dtype=tf.int32), tp_c) is_gt_box_detected = tf.maximum( tf.one_hot(gt_id, num_gt_c, dtype=tf.int32), is_gt_box_detected) self.tp[c] = tf.concat([self.tp[c], tp_c], axis=0) self.scores[c] = tf.concat([self.scores[c], det_scores_c], axis=0) self.num_gt[c] += num_gt_c return tf.no_op()
def preprocess(inputs, output_keys=None, is_training=False, using_sequence_dataset=False, num_frame_to_load=1, transform_points_fn=None, image_preprocess_fn_dic=None, images_points_correspondence_fn=None, compute_semantic_labels_fn=None, compute_motion_labels_fn=None, view_names=(), points_key='points', colors_key='colors', normals_key='normals', intensities_key='intensities', elongations_key='elongations', semantic_labels_key='semantic_labels', motion_labels_key='motion_labels', spin_coords_key=None, points_in_image_frame_key=None, num_points_to_randomly_sample=None, x_min_degree_rotation=None, x_max_degree_rotation=None, y_min_degree_rotation=None, y_max_degree_rotation=None, z_min_degree_rotation=None, z_max_degree_rotation=None, points_pad_or_clip_size=None, voxels_pad_or_clip_size=None, voxel_grid_cell_size=(0.1, 0.1, 0.1), num_offset_bins_x=4, num_offset_bins_y=4, num_offset_bins_z=4, point_feature_keys=('point_offsets', ), point_to_voxel_segment_func=tf.math.unsorted_segment_mean, x_random_crop_size=None, y_random_crop_size=None, min_scale_ratio=None, max_scale_ratio=None, semantic_labels_offset=0, ignore_labels=(), remove_unlabeled_images_and_points=False, labeled_view_name=None, only_keep_first_return_lidar_points=False): """Preprocesses a dictionary of `Tensor` inputs. If is_training=True, it will randomly rotate the points around the z axis, and will randomly flip the points with respect to x and/or y axis. Note that the preprocessor function does not correct normal vectors if they exist in the inputs. Note that the preprocessing effects all values of `inputs` that are `Tensors`. Args: inputs: A dictionary of inputs. Each value must be a `Tensor`. output_keys: Either None, or a list of strings containing the keys in the dictionary that is returned by the preprocess function. is_training: Whether we're training or testing. using_sequence_dataset: if true, the inputs will contain scene and multiple frames data. num_frame_to_load: If greater than 1, load multiframe point cloud point positions and its correspondence. transform_points_fn: Fn to transform other frames to a specific frame's coordinate. image_preprocess_fn_dic: Image preprocessing function. Maps view names to their image preprocessing functions. Set it to None, if there are no images to preprocess or you are not interested in preprocesing images. images_points_correspondence_fn: The function that computes correspondence between images and points. compute_semantic_labels_fn: If not None, semantic labels will be computed using this function. compute_motion_labels_fn: If not None, motion labels will be computed using this function. view_names: Names corresponding to 2d views of the scene. points_key: The key used for `points` in the inputs. colors_key: The key used for `colors` in the inputs. normals_key: The key used for 'normals' in the inputs. intensities_key: The key used for 'intensities' in the inputs. elongations_key: The key used for 'elongations' in the inputs. semantic_labels_key: The key used for 'semantic_labels' in the inputs. motion_labels_key: The key used for 'motion_labels' in the inputs. spin_coords_key: The key used for 'spin_coords' in the inputs. In Waymo data, spin_coords is a [num_points, 3] tensor that contains scan_index, shot_index, return_index. In Waymo data, return_index of the first return points is 0. points_in_image_frame_key: A string that identifies the tensor that contains the points_in_image_frame tensor. If None, it won't be used. num_points_to_randomly_sample: Number of points to randomly sample. If None, it will keep the original points and does not perform sampling. x_min_degree_rotation: Min degree of rotation around the x axis. x_max_degree_rotation: Max degree of ratation around the x axis. y_min_degree_rotation: Min degree of rotation around the y axis. y_max_degree_rotation: Max degree of ratation around the y axis. z_min_degree_rotation: Min degree of rotation around the z axis. z_max_degree_rotation: Max degree of ratation around the z axis. points_pad_or_clip_size: Number of target points to pad or clip to. If None, it will not perform the point padding. voxels_pad_or_clip_size: Number of target voxels to pad or clip to. If None, it will not perform the voxel padding. voxel_grid_cell_size: A three dimensional tuple determining the voxel grid size. num_offset_bins_x: Number of bins for point offsets in x direction. num_offset_bins_y: Number of bins for point offsets in y direction. num_offset_bins_z: Number of bins for point offsets in z direction. point_feature_keys: The keys used to form the voxel features. point_to_voxel_segment_func: The function used to aggregate the features of the points that fall in the same voxel. x_random_crop_size: Size of the random crop in x dimension. If None, random crop will not take place on x dimension. y_random_crop_size: Size of the random crop in y dimension. If None, random crop will not take place on y dimension. min_scale_ratio: Minimum scale ratio. Used for scaling point cloud. max_scale_ratio: Maximum scale ratio. Used for scaling point cloud. semantic_labels_offset: An integer offset that will be added to labels. ignore_labels: A tuple containing labels that should be ignored when computing the loss and metrics. remove_unlabeled_images_and_points: If True, removes the images that are not labeled and also removes the points that are associated with those images. labeled_view_name: The name of the view that is labeled, otherwise None. only_keep_first_return_lidar_points: If True, we only keep the first return lidar points. Returns: The mean subtracted points with an optional rotation applied. Raises: ValueError: if `inputs` doesn't contain the points_key. ValueError: if `points_in_image_frame` does not have rank 3. """ inputs = dict(inputs) if using_sequence_dataset: all_frame_inputs = inputs scene = all_frame_inputs['scene'] frame1 = all_frame_inputs['frame1'] frame_start_index = all_frame_inputs['frame_start_index'] inputs = dict( all_frame_inputs['frame0'] ) # so that the following processing code can be unchanged. # Initializing empty dictionary for mesh, image, indices_2d and non tensor # inputs. non_tensor_inputs = {} view_image_inputs = {} view_indices_2d_inputs = {} mesh_inputs = {} if image_preprocess_fn_dic is None: image_preprocess_fn_dic = {} # Convert all float64 to float32 and all int64 to int32. for key in sorted(inputs): if isinstance(inputs[key], tf.Tensor): if inputs[key].dtype == tf.float64: inputs[key] = tf.cast(inputs[key], dtype=tf.float32) if inputs[key].dtype == tf.int64: inputs[key] = tf.cast(inputs[key], dtype=tf.int32) if points_key in inputs: inputs[standard_fields.InputDataFields. point_positions] = inputs[points_key] if colors_key is not None and colors_key in inputs: inputs[ standard_fields.InputDataFields.point_colors] = inputs[colors_key] if normals_key is not None and normals_key in inputs: inputs[standard_fields.InputDataFields. point_normals] = inputs[normals_key] if intensities_key is not None and intensities_key in inputs: inputs[standard_fields.InputDataFields. point_intensities] = inputs[intensities_key] if elongations_key is not None and elongations_key in inputs: inputs[standard_fields.InputDataFields. point_elongations] = inputs[elongations_key] if semantic_labels_key is not None and semantic_labels_key in inputs: inputs[standard_fields.InputDataFields. object_class_points] = inputs[semantic_labels_key] if motion_labels_key is not None and motion_labels_key in inputs: inputs[standard_fields.InputDataFields. object_flow_points] = inputs[motion_labels_key] if spin_coords_key is not None and spin_coords_key in inputs: inputs[standard_fields.InputDataFields. point_spin_coordinates] = inputs[spin_coords_key] # Acquire point / image correspondences. if images_points_correspondence_fn is not None: fn_outputs = images_points_correspondence_fn(inputs) if 'points_position' in fn_outputs: inputs[standard_fields.InputDataFields. point_positions] = fn_outputs['points_position'] if 'points_intensity' in fn_outputs and intensities_key is not None: inputs[standard_fields.InputDataFields. point_intensities] = fn_outputs['points_intensity'] if 'points_elongation' in fn_outputs and elongations_key is not None: inputs[standard_fields.InputDataFields. point_elongations] = fn_outputs['points_elongation'] if 'points_label' in fn_outputs and semantic_labels_key is not None: inputs[standard_fields.InputDataFields. object_class_points] = fn_outputs['points_label'] if 'view_images' in fn_outputs: for key in sorted(fn_outputs['view_images']): if len(fn_outputs['view_images'][key].shape) != 4: raise ValueError(('%s image should have rank 4.' % key)) view_image_inputs = fn_outputs['view_images'] if 'view_indices_2d' in fn_outputs: for key in sorted(fn_outputs['view_indices_2d']): if len(fn_outputs['view_indices_2d'][key].shape) != 3: raise ValueError( ('%s indices_2d should have rank 3.' % key)) view_indices_2d_inputs = fn_outputs['view_indices_2d'] else: if points_in_image_frame_key is not None: inputs['rgb_view/features'] = inputs['image'] inputs['rgb_view/indices_2d'] = inputs[points_in_image_frame_key] if len(inputs['rgb_view/indices_2d'].shape) != 3: raise ValueError('`points_in_image_frame` should have rank 3.') frame0 = inputs.copy() if num_frame_to_load > 1: point_positions_list = [ frame0[standard_fields.InputDataFields.point_positions] ] if view_indices_2d_inputs: view_indices_2d_list = [view_indices_2d_inputs[view_names[0]]] frame_source_list = [ tf.zeros([ tf.shape( frame0[standard_fields.InputDataFields.point_positions])[0] ], tf.int32) ] for i in range(1, num_frame_to_load): target_frame_key = 'frame' + str(i) if images_points_correspondence_fn is not None: frame_i = images_points_correspondence_fn( all_frame_inputs[target_frame_key]) else: raise ValueError( 'images_points_correspondence_fn is needed for loading multi-frame pointclouds.' ) transformed_point_positions = transform_points_fn( scene, frame_i['points_position'], frame_start_index, i + frame_start_index) point_positions_list.append(transformed_point_positions) if view_indices_2d_inputs: view_indices_2d_list.append( frame_i['view_indices_2d'][view_names[0]]) frame_source_list.append( tf.ones([tf.shape(transformed_point_positions)[0]], tf.int32) * i) # add multi-frame info to override inputs and view_indices_2d_inputs inputs[standard_fields.InputDataFields. point_frame_index] = tf.expand_dims(tf.concat(frame_source_list, axis=0), axis=1) inputs[standard_fields.InputDataFields.point_positions] = tf.concat( point_positions_list, axis=0) if view_indices_2d_inputs: view_indices_2d_inputs[view_names[0]] = tf.concat( view_indices_2d_list, axis=1) # Validate inputs. if standard_fields.InputDataFields.point_positions not in inputs: raise ValueError('`inputs` must contain a point_positions') if inputs[ standard_fields.InputDataFields.point_positions].shape.ndims != 2: raise ValueError('points must be of rank 2.') if inputs[standard_fields.InputDataFields.point_positions].shape[1] != 3: raise ValueError('point should be 3 dimensional.') # Remove normal nans. if standard_fields.InputDataFields.point_normals in inputs: inputs[standard_fields.InputDataFields.point_normals] = tf.where( tf.math.is_nan( inputs[standard_fields.InputDataFields.point_normals]), tf.zeros_like( inputs[standard_fields.InputDataFields.point_normals]), inputs[standard_fields.InputDataFields.point_normals]) # Compute semantic labels if compute_semantic_labels_fn is not None # An example is when the ground-truth contains 3d object boxes and not per # point labels. This would be a function that infers point labels from boxes. if compute_semantic_labels_fn is not None: inputs[standard_fields.InputDataFields. object_class_points] = compute_semantic_labels_fn( inputs=frame0, points_key=standard_fields.InputDataFields.point_positions) if compute_motion_labels_fn is not None: inputs[standard_fields.InputDataFields. object_flow_points] = compute_motion_labels_fn( scene=scene, frame0=frame0, frame1=frame1, frame_start_index=frame_start_index, points_key=standard_fields.InputDataFields.point_positions) # Splitting inputs to {view_image_inputs, # view_indices_2d_inputs, # mesh_inputs, # non_tensor_inputs} mesh_keys = [] for key in [ standard_fields.InputDataFields.point_positions, standard_fields.InputDataFields.point_colors, standard_fields.InputDataFields.point_normals, standard_fields.InputDataFields.point_intensities, standard_fields.InputDataFields.point_elongations, standard_fields.InputDataFields.object_class_points, standard_fields.InputDataFields.point_spin_coordinates, standard_fields.InputDataFields.object_flow_points, standard_fields.InputDataFields.point_frame_index, ]: if key is not None and key in inputs: mesh_keys.append(key) view_image_names = [('%s/features' % key) for key in view_names] view_indices_2d_names = [('%s/indices_2d' % key) for key in view_names] # Additional key collecting for k, v in six.iteritems(inputs): if k in view_image_names: view_image_inputs[k] = v elif k in view_indices_2d_names: view_indices_2d_inputs[k] = v elif k in mesh_keys: if num_frame_to_load > 1: pad_size = tf.shape( inputs[standard_fields.InputDataFields. point_positions])[0] - tf.shape(v)[0] if k == standard_fields.InputDataFields.object_class_points: pad_value = -1 else: pad_value = 0 v = tf.pad(v, [[0, pad_size], [0, 0]], constant_values=pad_value) mesh_inputs[k] = v else: non_tensor_inputs[k] = v # Remove points that are not in the lidar first return (optional) if only_keep_first_return_lidar_points: _remove_second_return_lidar_points( mesh_inputs=mesh_inputs, view_indices_2d_inputs=view_indices_2d_inputs) # Randomly sample points preprocessor_utils.randomly_sample_points( mesh_inputs=mesh_inputs, view_indices_2d_inputs=view_indices_2d_inputs, target_num_points=num_points_to_randomly_sample) # Add weights if it does not exist in inputs. The weight of the points with # label in `ignore_labels` is set to 0. This helps the loss and metrics to # ignore those labels. use_weights = ( standard_fields.InputDataFields.object_class_points in mesh_inputs or standard_fields.InputDataFields.object_flow_points in mesh_inputs) if use_weights: if num_frame_to_load > 1: num_valid_points_frame0 = tf.shape( frame0[standard_fields.InputDataFields.point_positions])[0] num_additional_frame_points = tf.shape( mesh_inputs[standard_fields.InputDataFields. object_class_points])[0] - num_valid_points_frame0 weights = tf.concat([ tf.ones([num_valid_points_frame0, 1], tf.float32), tf.zeros([num_additional_frame_points, 1], tf.float32) ], axis=0) else: weights = tf.ones_like(mesh_inputs[ standard_fields.InputDataFields.object_class_points], dtype=tf.float32) if standard_fields.InputDataFields.object_class_points in mesh_inputs: mesh_inputs[ standard_fields.InputDataFields.object_class_points] = tf.cast( mesh_inputs[ standard_fields.InputDataFields.object_class_points], dtype=tf.int32) for ignore_label in ignore_labels: weights *= tf.cast(tf.not_equal( mesh_inputs[ standard_fields.InputDataFields.object_class_points], ignore_label), dtype=tf.float32) mesh_inputs[ standard_fields.InputDataFields.point_loss_weights] = weights mesh_inputs[standard_fields.InputDataFields. object_class_points] += semantic_labels_offset # We normalize the intensities and elongations to be in a smaller range. if standard_fields.InputDataFields.point_intensities in mesh_inputs: mesh_inputs[standard_fields.InputDataFields. point_intensities] = change_intensity_range( intensities=mesh_inputs[ standard_fields.InputDataFields.point_intensities]) if standard_fields.InputDataFields.point_elongations in mesh_inputs: mesh_inputs[ standard_fields.InputDataFields.point_elongations] = (tf.cast( mesh_inputs[standard_fields.InputDataFields.point_elongations], dtype=tf.float32) * 2.0 / 255.0) - 1.0 # Random scale the points. if min_scale_ratio is not None and max_scale_ratio is not None: scale_ratio = tf.random.uniform([], minval=min_scale_ratio, maxval=max_scale_ratio, dtype=tf.float32) mesh_inputs[ standard_fields.InputDataFields.point_positions] *= scale_ratio if standard_fields.InputDataFields.object_flow_points in mesh_inputs: mesh_inputs[standard_fields.InputDataFields. object_flow_points] *= scale_ratio # Random crop the points. randomly_crop_points(mesh_inputs=mesh_inputs, view_indices_2d_inputs=view_indices_2d_inputs, x_random_crop_size=x_random_crop_size, y_random_crop_size=y_random_crop_size) # If training, pick the best labeled image and points that project to it. # In many datasets, only one image is labeled anyways. if remove_unlabeled_images_and_points: pick_labeled_image(mesh_inputs=mesh_inputs, view_image_inputs=view_image_inputs, view_indices_2d_inputs=view_indices_2d_inputs, view_name=labeled_view_name) # Process images. preprocessor_utils.preprocess_images( view_image_inputs=view_image_inputs, view_indices_2d_inputs=view_indices_2d_inputs, image_preprocess_fn_dic=image_preprocess_fn_dic, is_training=is_training) # Record the original points. original_points = mesh_inputs[ standard_fields.InputDataFields.point_positions] if standard_fields.InputDataFields.point_colors in mesh_inputs: original_colors = mesh_inputs[ standard_fields.InputDataFields.point_colors] if standard_fields.InputDataFields.point_normals in mesh_inputs: original_normals = mesh_inputs[ standard_fields.InputDataFields.point_normals] # Update feature visibility count. if 'feature_visibility_count' in mesh_inputs: mesh_inputs['feature_visibility_count'] = tf.maximum( mesh_inputs['feature_visibility_count'], 1) mesh_inputs['features'] /= tf.cast( mesh_inputs['feature_visibility_count'], dtype=tf.float32) # Subtract mean from points. mean_points = tf.reduce_mean( mesh_inputs[standard_fields.InputDataFields.point_positions], axis=0) mesh_inputs[ standard_fields.InputDataFields.point_positions] -= tf.expand_dims( mean_points, axis=0) # Rotate points randomly. if standard_fields.InputDataFields.point_normals in mesh_inputs: normals = mesh_inputs[standard_fields.InputDataFields.point_normals] else: normals = None if standard_fields.InputDataFields.object_flow_points in mesh_inputs: motions = mesh_inputs[ standard_fields.InputDataFields.object_flow_points] else: motions = None (mesh_inputs[standard_fields.InputDataFields.point_positions], rotated_normals, rotated_motions) = rotate_randomly( points=mesh_inputs[standard_fields.InputDataFields.point_positions], normals=normals, motions=motions, x_min_degree_rotation=x_min_degree_rotation, x_max_degree_rotation=x_max_degree_rotation, y_min_degree_rotation=y_min_degree_rotation, y_max_degree_rotation=y_max_degree_rotation, z_min_degree_rotation=z_min_degree_rotation, z_max_degree_rotation=z_max_degree_rotation) # Random flipping in x and y directions. (mesh_inputs[standard_fields.InputDataFields.point_positions], flipped_normals, flipped_motions) = flip_randomly_points_and_normals_motions( points=mesh_inputs[standard_fields.InputDataFields.point_positions], normals=rotated_normals, motions=rotated_motions, is_training=is_training) if standard_fields.InputDataFields.point_normals in mesh_inputs: mesh_inputs[ standard_fields.InputDataFields.point_normals] = flipped_normals if standard_fields.InputDataFields.object_flow_points in mesh_inputs: mesh_inputs[standard_fields.InputDataFields. object_flow_points] = flipped_motions # Normalize RGB to [-1.0, 1.0]. if standard_fields.InputDataFields.point_colors in mesh_inputs: mesh_inputs[standard_fields.InputDataFields.point_colors] = tf.cast( mesh_inputs[standard_fields.InputDataFields.point_colors], dtype=tf.float32) mesh_inputs[standard_fields.InputDataFields.point_colors] *= (2.0 / 255.0) mesh_inputs[standard_fields.InputDataFields.point_colors] -= 1.0 # Add original points to mesh inputs. mesh_inputs[standard_fields.InputDataFields. point_positions_original] = original_points if standard_fields.InputDataFields.point_colors in mesh_inputs: mesh_inputs[standard_fields.InputDataFields. point_colors_original] = original_colors if standard_fields.InputDataFields.point_normals in mesh_inputs: mesh_inputs[standard_fields.InputDataFields. point_normals_original] = original_normals # Pad or clip the point tensors. pad_or_clip(mesh_inputs=mesh_inputs, view_indices_2d_inputs=view_indices_2d_inputs, pad_or_clip_size=points_pad_or_clip_size) if num_frame_to_load > 1: # Note: num_valid_points is the sum of 'num_points_per_fram' for now. # num_points_per_frame is each frame's valid num of points. # TODO(huangrui): if random sampling is called earlier, the count here # is not guaranteed to be in order. need sorting. if num_points_to_randomly_sample is not None: raise ValueError( 'randomly sample is not compatible with padding multi frame point clouds yet!' ) _, _, mesh_inputs[standard_fields.InputDataFields. num_valid_points_per_frame] = tf.unique_with_counts( tf.reshape( mesh_inputs[standard_fields.InputDataFields. point_frame_index], [-1])) if points_pad_or_clip_size is not None: padded_points = tf.where_v2( tf.greater( points_pad_or_clip_size, mesh_inputs[ standard_fields.InputDataFields.num_valid_points]), points_pad_or_clip_size - mesh_inputs[standard_fields.InputDataFields.num_valid_points], 0) # Correct the potential unique count error from optionally padded 0s point # frame index. mesh_inputs[ standard_fields.InputDataFields. num_valid_points_per_frame] -= tf.pad( tf.expand_dims(padded_points, 0), [[ 0, tf.shape(mesh_inputs[standard_fields.InputDataFields. num_valid_points_per_frame])[0] - 1 ]]) # Putting back the dictionaries together processed_inputs = mesh_inputs.copy() processed_inputs.update(non_tensor_inputs) for key in sorted(view_image_inputs): processed_inputs[('%s/features' % key)] = view_image_inputs[key] for key in sorted(view_indices_2d_inputs): processed_inputs[('%s/indices_2d' % key)] = view_indices_2d_inputs[key] # Create features that do not exist if 'point_offsets' in point_feature_keys: preprocessor_utils.add_point_offsets( inputs=processed_inputs, voxel_grid_cell_size=voxel_grid_cell_size) if 'point_offset_bins' in point_feature_keys: preprocessor_utils.add_point_offset_bins( inputs=processed_inputs, voxel_grid_cell_size=voxel_grid_cell_size, num_bins_x=num_offset_bins_x, num_bins_y=num_offset_bins_y, num_bins_z=num_offset_bins_z) # Voxelize point features preprocessor_utils.voxelize_point_features( inputs=processed_inputs, voxels_pad_or_clip_size=voxels_pad_or_clip_size, voxel_grid_cell_size=voxel_grid_cell_size, point_feature_keys=point_feature_keys, point_to_voxel_segment_func=point_to_voxel_segment_func, num_frame_to_load=num_frame_to_load) # Voxelize point / image correspondence indices preprocessor_utils.voxelize_point_to_view_correspondences( inputs=processed_inputs, view_indices_2d_inputs=view_indices_2d_inputs, voxels_pad_or_clip_size=voxels_pad_or_clip_size, voxel_grid_cell_size=voxel_grid_cell_size) # Voxelizing the semantic labels preprocessor_utils.voxelize_semantic_labels( inputs=processed_inputs, voxels_pad_or_clip_size=voxels_pad_or_clip_size, voxel_grid_cell_size=voxel_grid_cell_size) # Voxelizing the loss weights preprocessor_utils.voxelize_property_tensor( inputs=processed_inputs, point_tensor_key=standard_fields.InputDataFields.point_loss_weights, corresponding_voxel_tensor_key=standard_fields.InputDataFields. voxel_loss_weights, voxels_pad_or_clip_size=voxels_pad_or_clip_size, voxel_grid_cell_size=voxel_grid_cell_size, segment_func=tf.math.unsorted_segment_max) # Voxelizing the object flow if standard_fields.InputDataFields.object_flow_points in processed_inputs: preprocessor_utils.voxelize_property_tensor( inputs=processed_inputs, point_tensor_key=standard_fields.InputDataFields. object_flow_points, corresponding_voxel_tensor_key='object_flow_voxels_max', voxels_pad_or_clip_size=voxels_pad_or_clip_size, voxel_grid_cell_size=voxel_grid_cell_size, segment_func=tf.math.unsorted_segment_max) preprocessor_utils.voxelize_property_tensor( inputs=processed_inputs, point_tensor_key=standard_fields.InputDataFields. object_flow_points, corresponding_voxel_tensor_key='object_flow_voxels_min', voxels_pad_or_clip_size=voxels_pad_or_clip_size, voxel_grid_cell_size=voxel_grid_cell_size, segment_func=tf.math.unsorted_segment_min) processed_inputs[standard_fields.InputDataFields. object_flow_voxels] = processed_inputs[ 'object_flow_voxels_max'] + processed_inputs[ 'object_flow_voxels_min'] if num_frame_to_load > 1: mesh_inputs[ standard_fields.InputDataFields.num_valid_points] = mesh_inputs[ standard_fields.InputDataFields.num_valid_points_per_frame][0] # Filter preprocessed_inputs by output_keys if it is not None. if output_keys is not None: processed_inputs = { k: v for k, v in six.iteritems(processed_inputs) if k in output_keys } return processed_inputs