def fn(): """Loss function for when number of input and output boxes is positive.""" if is_balanced: weights = loss_utils.get_balanced_loss_weights_multiclass( labels=input_boxes_instance_id) else: weights = tf.ones([tf.shape(input_boxes_instance_id)[0], 1], dtype=tf.float32) normalized_box_size = 5.0 predicted_boxes_length = output_boxes_length predicted_boxes_height = output_boxes_height predicted_boxes_width = output_boxes_width predicted_boxes_center = output_boxes_center predicted_boxes_rotation_matrix = output_boxes_rotation_matrix gt_boxes_length = input_boxes_length gt_boxes_height = input_boxes_height gt_boxes_width = input_boxes_width gt_boxes_center = input_boxes_center gt_boxes_rotation_matrix = input_boxes_rotation_matrix if loss_type in ['normalized_huber', 'normalized_euclidean']: predicted_boxes_length /= (gt_boxes_length / normalized_box_size) predicted_boxes_height /= (gt_boxes_height / normalized_box_size) predicted_boxes_width /= (gt_boxes_width / normalized_box_size) gt_boxes_length = tf.ones_like( gt_boxes_length, dtype=tf.float32) * normalized_box_size gt_boxes_height = tf.ones_like( gt_boxes_height, dtype=tf.float32) * normalized_box_size gt_boxes_width = tf.ones_like( gt_boxes_width, dtype=tf.float32) * normalized_box_size gt_box_corners = box_utils.get_box_corners_3d( boxes_length=gt_boxes_length, boxes_height=gt_boxes_height, boxes_width=gt_boxes_width, boxes_rotation_matrix=gt_boxes_rotation_matrix, boxes_center=gt_boxes_center) predicted_box_corners = box_utils.get_box_corners_3d( boxes_length=predicted_boxes_length, boxes_height=predicted_boxes_height, boxes_width=predicted_boxes_width, boxes_rotation_matrix=predicted_boxes_rotation_matrix, boxes_center=predicted_boxes_center) corner_weights = tf.tile(weights, [1, 8]) if loss_type in ['huber', 'normalized_huber']: loss_fn = tf.keras.losses.Huber( delta=delta, reduction=tf.keras.losses.Reduction.NONE) elif loss_type in [ 'normalized_absolute_difference', 'absolute_difference' ]: loss_fn = tf.keras.losses.MeanAbsoluteError( reduction=tf.keras.losses.Reduction.NONE) else: raise ValueError(('Unknown loss type %s.' % loss_type)) box_corner_losses = loss_fn(y_true=tf.reshape(gt_box_corners, [-1, 3]), y_pred=tf.reshape(predicted_box_corners, [-1, 3])) return tf.reduce_mean(box_corner_losses * tf.reshape(corner_weights, [-1]))
def _observation_cost(obs): c_theta, s_theta, d_theta = obs[:, :1], obs[:, 1:2], obs[:, 2:3] theta = tf.math.atan2(s_theta, c_theta) cost = tf.reduce_sum(tf.square(theta) + 0.1 * tf.square(d_theta), axis=1) cost = tf.where(tf.math.is_nan(cost), 1e6 * tf.ones_like(cost), cost) return cost
def fit_gaussian(embeddings, damping=1e-7, full_covariance=False): """Fits a unimodal Gaussian distribution to `embeddings`. Args: embeddings: A [batch_size, embedding_dim] tf.Tensor of embeddings. damping: The scale of the covariance damping coefficient. full_covariance: Whether to use a full or diagonal covariance. Returns: Parameter estimates (means and log variances) for a Gaussian model. """ if full_covariance: num, dim = tf.split(tf.shape(input=embeddings), num_or_size_splits=2) num, dim = tf.squeeze(num), tf.squeeze(dim) sample_mean = tf.reduce_mean(input_tensor=embeddings, axis=0) centered_embeddings = embeddings - sample_mean sample_covariance = tf.einsum('ij,ik->kj', centered_embeddings, centered_embeddings) # Outer product. sample_covariance += damping * tf.eye(dim) # Positive definiteness. sample_covariance /= tf.cast(num, dtype=tf.float32) # Scale by N. return sample_mean, sample_covariance else: sample_mean, sample_variances = tf.nn.moments(x=embeddings) log_variances = tf.math.log(sample_variances + damping * tf.ones_like(sample_variances)) return sample_mean, log_variances
def fn(): """Loss function for when number of input and output boxes is positive.""" if is_balanced: weights = loss_utils.get_balanced_loss_weights_multiclass( labels=input_boxes_instance_id) else: weights = tf.ones([tf.shape(input_boxes_instance_id)[0], 1], dtype=tf.float32) gt_length = tf.reshape(input_boxes_length, [-1, 1]) gt_height = tf.reshape(input_boxes_height, [-1, 1]) gt_width = tf.reshape(input_boxes_width, [-1, 1]) predicted_length = tf.reshape(output_boxes_length, [-1, 1]) predicted_height = tf.reshape(output_boxes_height, [-1, 1]) predicted_width = tf.reshape(output_boxes_width, [-1, 1]) predicted_length /= gt_length predicted_height /= gt_height predicted_width /= gt_width predicted_size = tf.concat( [predicted_length, predicted_height, predicted_width], axis=1) gt_size = tf.ones_like(predicted_size) if loss_type == 'huber': loss_fn = tf.keras.losses.Huber( delta=delta, reduction=tf.keras.losses.Reduction.NONE) elif loss_type == 'absolute_difference': loss_fn = tf.keras.losses.MeanAbsoluteError( reduction=tf.keras.losses.Reduction.NONE) else: raise ValueError(('Unknown loss type %s.' % loss_type)) size_losses = loss_fn(y_true=gt_size, y_pred=predicted_size) return tf.reduce_mean(size_losses * tf.reshape(weights, [-1]))
def ctrl_rewards(states, actions, rewards, next_states, contexts, reward_scales=1.0): """Returns the negative control cost. Args: states: A [batch_size, num_state_dims] Tensor representing a batch of states. actions: A [batch_size, num_action_dims] Tensor representing a batch of actions. rewards: A [batch_size] Tensor representing a batch of rewards. next_states: A [batch_size, num_state_dims] Tensor representing a batch of next states. contexts: A list of [batch_size, num_context_dims] Tensor representing a batch of contexts. reward_scales: multiplicative scale for rewards. A scalar or 1D tensor, must be broadcastable to number of reward dimensions. Returns: A new tf.float32 [batch_size] rewards Tensor, and tf.float32 [batch_size] discounts tensor. """ del states, rewards, contexts # Unused if actions is None: rewards = tf.to_float(tf.zeros(shape=next_states.shape[:1])) else: rewards = -tf.reduce_sum(tf.square(actions), axis=1) rewards *= reward_scales rewards = tf.to_float(rewards) return rewards, tf.ones_like(rewards)
def __init__(self, observations, env_spec): with tf.name_scope('fully_conv_model'): spatial_streams = { name: spatial_stream(observations[name], spec) for name, spec in env_spec.observation_spec.items() if spec.is_spatial } fc = Concatenate()( [Flatten()(x) for x in spatial_streams.values()]) fc = Dense( 256, activation='relu', name='fc', kernel_initializer=tf.keras.initializers.Orthogonal())(fc) with tf.name_scope('policy'): self.policy = {} for name, spec in env_spec.action_spec.items(): with tf.name_scope(name): if spec.obs_space: logits = Conv2D( 1, 1, activation='linear', data_format='channels_first', kernel_initializer=tf.keras.initializers. Orthogonal(gain=0.1))( spatial_streams[spec.obs_space]) logits = Flatten()(logits) else: logits = Dense( np.prod(spec.sizes), activation='linear', kernel_initializer=tf.keras.initializers. Orthogonal(gain=0.1))(fc) if name == 'function_id': logits = tf.where( observations['available_actions'] > 0, logits, -1000 * tf.ones_like(logits), name='mask_unavailable_functions') self.policy[name] = tfp.distributions.Categorical( logits=logits) with tf.name_scope('actions'): self.actions = { name: dist.sample(name=name + '_sample') for name, dist in self.policy.items() } with tf.name_scope('value'): self.value = value_output(fc)
def state_rewards(states, actions, rewards, next_states, contexts, weight_index=None, state_indices=None, weight_vector=1.0, offset_vector=0.0, summarize=False): """Returns the rewards that are linear mapping of next_states. Args: states: A [batch_size, num_state_dims] Tensor representing a batch of states. actions: A [batch_size, num_action_dims] Tensor representing a batch of actions. rewards: A [batch_size] Tensor representing a batch of rewards. next_states: A [batch_size, num_state_dims] Tensor representing a batch of next states. contexts: A list of [batch_size, num_context_dims] Tensor representing a batch of contexts. weight_index: (integer) Index of contexts lists that specify weighting. state_indices: (a list of Numpy integer array) Indices of states dimensions to be mapped. weight_vector: (a number or a list or Numpy array) The weighting vector, broadcastable to `next_states`. offset_vector: (a number or a list of Numpy array) The off vector. summarize: (boolean) enable summary ops. Returns: A new tf.float32 [batch_size] rewards Tensor, and tf.float32 [batch_size] discounts tensor. """ del states, actions, rewards # unused args stats = {} record_tensor(next_states, state_indices, stats) next_states = index_states(next_states, state_indices) weight = tf.constant( weight_vector, dtype=next_states.dtype, shape=next_states[0].shape) weights = tf.expand_dims(weight, 0) offset = tf.constant( offset_vector, dtype=next_states.dtype, shape=next_states[0].shape) offsets = tf.expand_dims(offset, 0) if weight_index is not None: weights *= contexts[weight_index] rewards = tf.to_float(tf.reduce_sum(weights * (next_states+offsets), axis=1)) if summarize: with tf.name_scope('RewardFn/'): summarize_stats(stats) return rewards, tf.ones_like(rewards)
def _loss_op(self): with tf.name_scope("loss_op"): # labels = tf.distributions.Uniform(low=0.7, high=1.2).sample(tf.shape(self._true_d)) labels = tf.ones_like(self._true_d) d_loss_true = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=self._true_d, labels=labels, )) # labels = tf.distributions.Uniform(low=0., high=0.3).sample(tf.shape(self._fake_d)) labels = tf.zeros_like(self._fake_d) d_loss_fake = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=self._fake_d, labels=labels)) self.d_loss = d_loss_true + d_loss_fake self.g_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=self._fake_d, labels=tf.ones_like( self._fake_d))) self.loss = [self.d_loss, self.g_loss]
def diff_rewards( states, actions, rewards, next_states, contexts, state_indices=None, goal_index=0,): """Returns (next_states - goals) as a batched vector reward.""" del states, rewards, actions # Unused if state_indices is not None: next_states = index_states(next_states, state_indices) rewards = tf.to_float(next_states - contexts[goal_index]) return rewards, tf.ones_like(rewards)
def _loss_op(self): with tf.name_scope("loss_op"): weights = tf.ones_like(self.y, name='weights') self.loss = sequence_loss(self.y_hat, self.y, weights=weights, loss_fn=which_loss(self._config.loss)) self._summary_dict.update({"loss": self.loss}) if hasattr(self, '_reg'): reg = tf.reduce_sum(self._reg) self.loss += reg self._summary_dict.update({"loss": self.loss, "reg": reg}) else: self._summary_dict.update({"loss": self.loss})
def tanh_similarity(states, actions, rewards, next_states, contexts, mse_scale=1.0, state_scales=1.0, goal_scales=1.0, summarize=False): """Returns the similarity between next_states and contexts using tanh and mse. Args: states: A [batch_size, num_state_dims] Tensor representing a batch of states. actions: A [batch_size, num_action_dims] Tensor representing a batch of actions. rewards: A [batch_size] Tensor representing a batch of rewards. next_states: A [batch_size, num_state_dims] Tensor representing a batch of next states. contexts: A list of [batch_size, num_context_dims] Tensor representing a batch of contexts. mse_scale: A float, to scale mse before tanh. state_scales: multiplicative scale for (next) states. A scalar or 1D tensor, must be broadcastable to number of state dimensions. goal_scales: multiplicative scale for contexts. A scalar or 1D tensor, must be broadcastable to number of goal dimensions. summarize: (boolean) enable summary ops. Returns: A new tf.float32 [batch_size] rewards Tensor, and tf.float32 [batch_size] discounts tensor. """ del states, actions, rewards # Unused mse = tf.reduce_mean(tf.squared_difference(next_states * state_scales, contexts[0] * goal_scales), -1) tanh = tf.tanh(mse_scale * mse) if summarize: with tf.name_scope('RewardFn/'): tf.summary.scalar('mean_mse', tf.reduce_mean(mse)) tf.summary.histogram('mse', mse) tf.summary.scalar('mean_tanh', tf.reduce_mean(tanh)) tf.summary.histogram('tanh', tanh) rewards = tf.to_float(1 - tanh) return rewards, tf.ones_like(rewards)
def _box_classification_loss_unbatched(inputs_1, outputs_1, is_intermediate, is_balanced, mine_hard_negatives, hard_negative_score_threshold): """Loss function for input and outputs of batch size 1.""" valid_mask = _get_voxels_valid_mask(inputs_1=inputs_1) if is_intermediate: logits = outputs_1[standard_fields.DetectionResultFields. intermediate_object_semantic_voxels] else: logits = outputs_1[ standard_fields.DetectionResultFields.object_semantic_voxels] num_classes = logits.get_shape().as_list()[-1] if num_classes is None: raise ValueError('Number of classes is unknown.') logits = tf.boolean_mask(tf.reshape(logits, [-1, num_classes]), valid_mask) labels = tf.boolean_mask( tf.reshape( inputs_1[standard_fields.InputDataFields.object_class_voxels], [-1, 1]), valid_mask) if mine_hard_negatives or is_balanced: instances = tf.boolean_mask( tf.reshape( inputs_1[ standard_fields.InputDataFields.object_instance_id_voxels], [-1]), valid_mask) params = {} if mine_hard_negatives: negative_scores = tf.reshape(tf.nn.softmax(logits)[:, 0], [-1]) hard_negative_mask = tf.logical_and( tf.less(negative_scores, hard_negative_score_threshold), tf.equal(tf.reshape(labels, [-1]), 0)) hard_negative_labels = tf.boolean_mask(labels, hard_negative_mask) hard_negative_logits = tf.boolean_mask(logits, hard_negative_mask) hard_negative_instances = tf.boolean_mask( tf.ones_like(instances) * (tf.reduce_max(instances) + 1), hard_negative_mask) logits = tf.concat([logits, hard_negative_logits], axis=0) instances = tf.concat([instances, hard_negative_instances], axis=0) labels = tf.concat([labels, hard_negative_labels], axis=0) if is_balanced: weights = loss_utils.get_balanced_loss_weights_multiclass( labels=tf.expand_dims(instances, axis=1)) params['weights'] = weights return classification_loss_fn(logits=logits, labels=labels, **params)
def _body_fn(i, indices_range, indices): """Computes the indices of the i-th point feature in each segment.""" indices_i = tf.math.unsorted_segment_max( data=indices_range, segment_ids=segment_ids, num_segments=num_segments) indices_i_positive_mask = tf.greater(indices_i, 0) indices_i_positive = tf.boolean_mask(indices_i, indices_i_positive_mask) boolean_mask = tf.scatter_nd( indices=tf.cast( tf.expand_dims(indices_i_positive - 1, axis=1), dtype=tf.int64), updates=tf.ones_like(indices_i_positive, dtype=tf.int32), shape=(n,)) indices_range *= (1 - boolean_mask) indices_i *= tf.cast(indices_i_positive_mask, dtype=tf.int32) indices_i = tf.pad( tf.expand_dims(indices_i, axis=1), paddings=[[0, 0], [i, num_samples_per_voxel - i - 1]]) indices += indices_i i = i + 1 return i, indices_range, indices
def plain_rewards(states, actions, rewards, next_states, contexts): """Returns the given rewards. Args: states: A [batch_size, num_state_dims] Tensor representing a batch of states. actions: A [batch_size, num_action_dims] Tensor representing a batch of actions. rewards: A [batch_size] Tensor representing a batch of rewards. next_states: A [batch_size, num_state_dims] Tensor representing a batch of next states. contexts: A list of [batch_size, num_context_dims] Tensor representing a batch of contexts. Returns: A new tf.float32 [batch_size] rewards Tensor, and tf.float32 [batch_size] discounts tensor. """ del states, actions, next_states, contexts # Unused return rewards, tf.ones_like(rewards)
def policy_output(state, available_actions, action_spec): def logits_output(num_categories, name): return Dense(num_categories, activation='linear', name=name + '_logits')(state) logits = [ logits_output(np.prod(spec.sizes), name) for name, spec in action_spec.items() ] logits[0] = tf.where(available_actions > 0, logits[0], -1000 * tf.ones_like(logits[0]), name='mask_unavailable_functions') dists = { name: tfp.distributions.Categorical(logits=logits[spec.id], name=name + '_dist') for name, spec in action_spec.items() } return dists
def binary_indicator(states, actions, rewards, next_states, contexts, termination_epsilon=1e-4, offset=0, epsilon=1e-10, state_indices=None, summarize=False): """Returns 0/1 by checking if next_states and contexts overlap. Args: states: A [batch_size, num_state_dims] Tensor representing a batch of states. actions: A [batch_size, num_action_dims] Tensor representing a batch of actions. rewards: A [batch_size] Tensor representing a batch of rewards. next_states: A [batch_size, num_state_dims] Tensor representing a batch of next states. contexts: A list of [batch_size, num_context_dims] Tensor representing a batch of contexts. termination_epsilon: terminate if dist is less than this quantity. offset: Offset the rewards. epsilon: small offset to ensure non-negative/zero distance. Returns: A new tf.float32 [batch_size] rewards Tensor, and tf.float32 [batch_size] discounts tensor. """ del states, actions # unused args next_states = index_states(next_states, state_indices) dist = tf.reduce_sum(tf.squared_difference(next_states, contexts[0]), -1) dist = tf.sqrt(dist + epsilon) discounts = dist > termination_epsilon rewards = tf.logical_not(discounts) rewards = tf.to_float(rewards) + offset return tf.to_float(rewards), tf.ones_like(tf.to_float(discounts)) #tf.to_float(discounts)
def set_element(v, i, x): mask = tf.one_hot(i, tf.shape(input=v)[0]) v_new = tf.ones_like(v) * x return tf.where(tf.equal(mask, 1), v_new, v)
def compute_module_criticality( objective_fn, module_variables_init, module_variables_final, num_samples_per_iteration=10, alpha_grid_size=10, sigma_grid_size=10, sigma_ratio=1.0, loss_threshold_condition=relative_error_condition, normalize_error=False, ): """Compute the criticality of a module parameterized by `module_variables`. Args: objective_fn: A callable that takes in an iterable of the module-specific variables and produces the value of the objective function. module_variables_init: A list of tf.Tensors; the variables of the module at initialization. module_variables_final: A list of tf.Tensors; the variables of the module at convergence. num_samples_per_iteration: Number of perturbations to sample each iteration. alpha_grid_size: The number of values to test for alpha, the interpolation coefficient. sigma_grid_size: The number of values to test for sigma, the standard deviation of the perturbation. sigma_ratio: Positive scalar multiplier k for values of sigma, to enforce that the tested values of sigma lie in [k * 1e-16, k]; the default is 1.0, implying that the tested values of sigma lie in the interval [1e-16, 1]. loss_threshold_condition: A callable that takes in a reference objective value and a candidate objective value and produces a thresholding decision. normalize_error: Whether to normalize the error that is minimized over in the definition of criticality by the Frobenius norm of the distance between initial and final parameters. Returns: A `collections.NamedTuple` that contains the results of the criticality analysis. """ initial_objective_value = objective_fn(module_variables_init) final_objective_value = objective_fn(module_variables_final) # Test a 2D grid of alpha and sigma values. float_zero = tf.cast(0, tf.float32) alphas, sigmas = tf.meshgrid( tf.linspace(float_zero, 1, alpha_grid_size + 1), tf.linspace(float_zero + 1e-16, 1, sigma_grid_size + 1) * sigma_ratio, ) alphas, sigmas = tf.reshape(alphas, [-1]), tf.reshape(sigmas, [-1]) def _evaluate_alpha_sigma(alpha_sigma): alpha, sigma = alpha_sigma return _interpolate_and_perturb( alpha=alpha, sigma=sigma, params_init=module_variables_init, params_final=module_variables_final, objective_fn=objective_fn, loss_threshold_condition=functools.partial( loss_threshold_condition, reference_error=final_objective_value), normalize_error=normalize_error, num_samples_per_iteration=num_samples_per_iteration, ) (threshold_conditions, interpolated_and_perturbed_losses, interpolated_and_perturbed_norms) = tf.map_fn( _evaluate_alpha_sigma, elems=(alphas, sigmas), dtype=(tf.bool, tf.float32, tf.float32), ) masked_interpolated_and_perturbed_norms = tf.where( threshold_conditions, interpolated_and_perturbed_norms, tf.ones_like(interpolated_and_perturbed_norms) * np.inf) idx_min = tf.math.argmin(masked_interpolated_and_perturbed_norms) (loss_final, norm_final, alpha_final, sigma_final) = (interpolated_and_perturbed_losses[idx_min], interpolated_and_perturbed_norms[idx_min], alphas[idx_min], sigmas[idx_min]) return ModuleCriticalityAnalysis( criticality_score=norm_final, alpha=alpha_final, sigma=sigma_final, loss_value=loss_final, num_samples_per_iteration=num_samples_per_iteration, alpha_grid_size=alpha_grid_size, sigma_grid_size=sigma_grid_size, sigma_ratio=sigma_ratio, initial_objective_value=initial_objective_value, final_objective_value=final_objective_value, )
def on_predict_batch_end(self, batch, logs=None): """Write mesh summaries of semantics groundtruth and prediction point clouds at the end of each validation batch.""" inputs = logs['inputs'] outputs = logs['outputs'] if self._metric: for metric in self._metric: metric.update_state(inputs=inputs, outputs=outputs) if batch <= self.num_qualitative_examples: # point cloud visualization vertices = tf.reshape( inputs[standard_fields.InputDataFields.point_positions], [-1, 3]) num_valid_points = tf.squeeze( inputs[standard_fields.InputDataFields.num_valid_points]) logits = outputs[ standard_fields.DetectionResultFields.object_semantic_points] num_classes = logits.get_shape().as_list()[-1] logits = tf.reshape(logits, [-1, num_classes]) gt_semantic_class = tf.reshape( inputs[standard_fields.InputDataFields.object_class_points], [-1]) vertices = vertices[:num_valid_points, :] logits = logits[:num_valid_points, :] gt_semantic_class = gt_semantic_class[:num_valid_points] max_num_points = tf.math.minimum(self.max_num_points_qualitative, num_valid_points) sample_indices = tf.random.shuffle( tf.range(num_valid_points))[:max_num_points] vertices = tf.gather(vertices, sample_indices) logits = tf.gather(logits, sample_indices) gt_semantic_class = tf.gather(gt_semantic_class, sample_indices) semantic_class = tf.math.argmax(logits, axis=1) pred_colors = tf.gather(self._pascal_color_map, semantic_class, axis=0) gt_colors = tf.gather(self._pascal_color_map, gt_semantic_class, axis=0) if standard_fields.InputDataFields.point_colors in inputs: point_colors = (tf.reshape( inputs[standard_fields.InputDataFields.point_colors], [-1, 3]) + 1.0) * 255.0 / 2.0 point_colors = point_colors[:num_valid_points, :] point_colors = tf.gather(point_colors, sample_indices) point_colors = tf.math.minimum(point_colors, 255.0) point_colors = tf.math.maximum(point_colors, 0.0) point_colors = tf.cast(point_colors, dtype=tf.uint8) else: point_colors = tf.ones_like(vertices, dtype=tf.uint8) * 128 # add points and colors for predicted objects if standard_fields.DetectionResultFields.objects_length in outputs: box_corners = box_utils.get_box_corners_3d( boxes_length=outputs[ standard_fields.DetectionResultFields.objects_length], boxes_height=outputs[ standard_fields.DetectionResultFields.objects_height], boxes_width=outputs[ standard_fields.DetectionResultFields.objects_width], boxes_rotation_matrix=outputs[ standard_fields.DetectionResultFields.objects_rotation_matrix], boxes_center=outputs[ standard_fields.DetectionResultFields.objects_center]) box_points = box_utils.get_box_as_dotted_lines(box_corners) objects_class = tf.reshape( outputs[standard_fields.DetectionResultFields.objects_class], [-1]) box_colors = tf.gather(self._pascal_color_map, objects_class, axis=0) box_colors = tf.repeat( box_colors[:, tf.newaxis, :], box_points.shape[1], axis=1) box_points = tf.reshape(box_points, [-1, 3]) box_colors = tf.reshape(box_colors, [-1, 3]) pred_vertices = tf.concat([vertices, box_points], axis=0) pred_colors = tf.concat([pred_colors, box_colors], axis=0) else: pred_vertices = vertices # add points and colors for gt objects if standard_fields.InputDataFields.objects_length in inputs: box_corners = box_utils.get_box_corners_3d( boxes_length=tf.reshape( inputs[standard_fields.InputDataFields.objects_length], [-1, 1]), boxes_height=tf.reshape( inputs[standard_fields.InputDataFields.objects_height], [-1, 1]), boxes_width=tf.reshape( inputs[standard_fields.InputDataFields.objects_width], [-1, 1]), boxes_rotation_matrix=tf.reshape( inputs[standard_fields.InputDataFields.objects_rotation_matrix], [-1, 3, 3]), boxes_center=tf.reshape( inputs[standard_fields.InputDataFields.objects_center], [-1, 3])) box_points = box_utils.get_box_as_dotted_lines(box_corners) objects_class = tf.reshape( inputs[standard_fields.InputDataFields.objects_class], [-1]) box_colors = tf.gather(self._pascal_color_map, objects_class, axis=0) box_colors = tf.repeat( box_colors[:, tf.newaxis, :], box_points.shape[1], axis=1) box_points = tf.reshape(box_points, [-1, 3]) box_colors = tf.reshape(box_colors, [-1, 3]) gt_vertices = tf.concat([vertices, box_points], axis=0) gt_colors = tf.concat([gt_colors, box_colors], axis=0) else: gt_vertices = vertices if batch == 1: logging.info('writing point cloud(shape %s) to summery.', gt_vertices.shape) if standard_fields.InputDataFields.camera_image_name in inputs: camera_image_name = str(inputs[ standard_fields.InputDataFields.camera_image_name].numpy()[0]) else: camera_image_name = str(batch) logging.info(camera_image_name) with self._val_mesh_writer.as_default(): mesh_summary.mesh( name=(self.split + '_points/' + camera_image_name), vertices=tf.expand_dims(vertices, axis=0), faces=None, colors=tf.expand_dims(point_colors, axis=0), config_dict=self._mesh_config_dict, step=self._val_step, ) mesh_summary.mesh( name=(self.split + '_predictions/' + camera_image_name), vertices=tf.expand_dims(pred_vertices, axis=0), faces=None, colors=tf.expand_dims(pred_colors, axis=0), config_dict=self._mesh_config_dict, step=self._val_step, ) mesh_summary.mesh( name=(self.split + '_ground_truth/' + camera_image_name), vertices=tf.expand_dims(gt_vertices, axis=0), faces=None, colors=tf.expand_dims(gt_colors, axis=0), config_dict=self._mesh_config_dict, step=self._val_step, ) if batch == self.num_qualitative_examples: self._val_mesh_writer.flush()
def cosine_similarity(states, starting_states, actions, rewards, next_states, contexts, state_scales=1.0, goal_scales=1.0, reward_scales=1.0, normalize_states=True, normalize_goals=True, weight_index=None, weight_vector=None, summarize=False, state_indices=None, goal_indices=None, offset=0.0): """Returns the cosine similarity between next_states - states and contexts. Args: states: A [batch_size, num_state_dims] Tensor representing a batch of states. actions: A [batch_size, num_action_dims] Tensor representing a batch of actions. rewards: A [batch_size] Tensor representing a batch of rewards. next_states: A [batch_size, num_state_dims] Tensor representing a batch of next states. contexts: A list of [batch_size, num_context_dims] Tensor representing a batch of contexts. state_scales: multiplicative scale for (next) states. A scalar or 1D tensor, must be broadcastable to number of state dimensions. goal_scales: multiplicative scale for goals. A scalar or 1D tensor, must be broadcastable to number of goal dimensions. reward_scales: multiplicative scale for rewards. A scalar or 1D tensor, must be broadcastable to number of reward dimensions. weight_index: (integer) The context list index that specifies weight. weight_vector: (a number or a list or Numpy array) The weighting vector, broadcastable to `next_states`. summarize: (boolean) enable summary ops. termination_epsilon: terminate if dist is less than this quantity. state_indices: (a list of integers) list of state indices to select. goal_indices: (a list of integers) list of goal indices to select. vectorize: Return a vectorized form. norm: L1 or L2. epsilon: small offset to ensure non-negative/zero distance. Returns: A new tf.float32 [batch_size] rewards Tensor, and tf.float32 [batch_size] discounts tensor. """ del actions, rewards # Unused stats = {} record_tensor(next_states, state_indices, stats, 'next_states') states = index_states(states, state_indices) next_states = index_states(next_states, state_indices) goals = index_states(contexts[0], goal_indices) if weight_vector is not None: goals *= tf.convert_to_tensor(weight_vector, dtype=next_states.dtype) if weight_index is not None: weights = tf.abs(index_states(contexts[0], weight_index)) goals *= weights direction_vec = next_states - states if normalize_states: direction_vec = tf.nn.l2_normalize(direction_vec, -1) goal_vec = goals if normalize_goals: goal_vec = tf.nn.l2_normalize(goal_vec, -1) similarity = tf.reduce_sum(goal_vec * direction_vec, -1) discounts = tf.ones_like(similarity) return offset + tf.to_float(similarity), tf.to_float(discounts)
def preprocess(inputs, output_keys=None, is_training=False, using_sequence_dataset=False, num_frame_to_load=1, transform_points_fn=None, image_preprocess_fn_dic=None, images_points_correspondence_fn=None, compute_semantic_labels_fn=None, compute_motion_labels_fn=None, view_names=(), points_key='points', colors_key='colors', normals_key='normals', intensities_key='intensities', elongations_key='elongations', semantic_labels_key='semantic_labels', motion_labels_key='motion_labels', spin_coords_key=None, points_in_image_frame_key=None, num_points_to_randomly_sample=None, x_min_degree_rotation=None, x_max_degree_rotation=None, y_min_degree_rotation=None, y_max_degree_rotation=None, z_min_degree_rotation=None, z_max_degree_rotation=None, points_pad_or_clip_size=None, voxels_pad_or_clip_size=None, voxel_grid_cell_size=(0.1, 0.1, 0.1), num_offset_bins_x=4, num_offset_bins_y=4, num_offset_bins_z=4, point_feature_keys=('point_offsets', ), point_to_voxel_segment_func=tf.math.unsorted_segment_mean, x_random_crop_size=None, y_random_crop_size=None, min_scale_ratio=None, max_scale_ratio=None, semantic_labels_offset=0, ignore_labels=(), remove_unlabeled_images_and_points=False, labeled_view_name=None, only_keep_first_return_lidar_points=False): """Preprocesses a dictionary of `Tensor` inputs. If is_training=True, it will randomly rotate the points around the z axis, and will randomly flip the points with respect to x and/or y axis. Note that the preprocessor function does not correct normal vectors if they exist in the inputs. Note that the preprocessing effects all values of `inputs` that are `Tensors`. Args: inputs: A dictionary of inputs. Each value must be a `Tensor`. output_keys: Either None, or a list of strings containing the keys in the dictionary that is returned by the preprocess function. is_training: Whether we're training or testing. using_sequence_dataset: if true, the inputs will contain scene and multiple frames data. num_frame_to_load: If greater than 1, load multiframe point cloud point positions and its correspondence. transform_points_fn: Fn to transform other frames to a specific frame's coordinate. image_preprocess_fn_dic: Image preprocessing function. Maps view names to their image preprocessing functions. Set it to None, if there are no images to preprocess or you are not interested in preprocesing images. images_points_correspondence_fn: The function that computes correspondence between images and points. compute_semantic_labels_fn: If not None, semantic labels will be computed using this function. compute_motion_labels_fn: If not None, motion labels will be computed using this function. view_names: Names corresponding to 2d views of the scene. points_key: The key used for `points` in the inputs. colors_key: The key used for `colors` in the inputs. normals_key: The key used for 'normals' in the inputs. intensities_key: The key used for 'intensities' in the inputs. elongations_key: The key used for 'elongations' in the inputs. semantic_labels_key: The key used for 'semantic_labels' in the inputs. motion_labels_key: The key used for 'motion_labels' in the inputs. spin_coords_key: The key used for 'spin_coords' in the inputs. In Waymo data, spin_coords is a [num_points, 3] tensor that contains scan_index, shot_index, return_index. In Waymo data, return_index of the first return points is 0. points_in_image_frame_key: A string that identifies the tensor that contains the points_in_image_frame tensor. If None, it won't be used. num_points_to_randomly_sample: Number of points to randomly sample. If None, it will keep the original points and does not perform sampling. x_min_degree_rotation: Min degree of rotation around the x axis. x_max_degree_rotation: Max degree of ratation around the x axis. y_min_degree_rotation: Min degree of rotation around the y axis. y_max_degree_rotation: Max degree of ratation around the y axis. z_min_degree_rotation: Min degree of rotation around the z axis. z_max_degree_rotation: Max degree of ratation around the z axis. points_pad_or_clip_size: Number of target points to pad or clip to. If None, it will not perform the point padding. voxels_pad_or_clip_size: Number of target voxels to pad or clip to. If None, it will not perform the voxel padding. voxel_grid_cell_size: A three dimensional tuple determining the voxel grid size. num_offset_bins_x: Number of bins for point offsets in x direction. num_offset_bins_y: Number of bins for point offsets in y direction. num_offset_bins_z: Number of bins for point offsets in z direction. point_feature_keys: The keys used to form the voxel features. point_to_voxel_segment_func: The function used to aggregate the features of the points that fall in the same voxel. x_random_crop_size: Size of the random crop in x dimension. If None, random crop will not take place on x dimension. y_random_crop_size: Size of the random crop in y dimension. If None, random crop will not take place on y dimension. min_scale_ratio: Minimum scale ratio. Used for scaling point cloud. max_scale_ratio: Maximum scale ratio. Used for scaling point cloud. semantic_labels_offset: An integer offset that will be added to labels. ignore_labels: A tuple containing labels that should be ignored when computing the loss and metrics. remove_unlabeled_images_and_points: If True, removes the images that are not labeled and also removes the points that are associated with those images. labeled_view_name: The name of the view that is labeled, otherwise None. only_keep_first_return_lidar_points: If True, we only keep the first return lidar points. Returns: The mean subtracted points with an optional rotation applied. Raises: ValueError: if `inputs` doesn't contain the points_key. ValueError: if `points_in_image_frame` does not have rank 3. """ inputs = dict(inputs) if using_sequence_dataset: all_frame_inputs = inputs scene = all_frame_inputs['scene'] frame1 = all_frame_inputs['frame1'] frame_start_index = all_frame_inputs['frame_start_index'] inputs = dict( all_frame_inputs['frame0'] ) # so that the following processing code can be unchanged. # Initializing empty dictionary for mesh, image, indices_2d and non tensor # inputs. non_tensor_inputs = {} view_image_inputs = {} view_indices_2d_inputs = {} mesh_inputs = {} if image_preprocess_fn_dic is None: image_preprocess_fn_dic = {} # Convert all float64 to float32 and all int64 to int32. for key in sorted(inputs): if isinstance(inputs[key], tf.Tensor): if inputs[key].dtype == tf.float64: inputs[key] = tf.cast(inputs[key], dtype=tf.float32) if inputs[key].dtype == tf.int64: inputs[key] = tf.cast(inputs[key], dtype=tf.int32) if points_key in inputs: inputs[standard_fields.InputDataFields. point_positions] = inputs[points_key] if colors_key is not None and colors_key in inputs: inputs[ standard_fields.InputDataFields.point_colors] = inputs[colors_key] if normals_key is not None and normals_key in inputs: inputs[standard_fields.InputDataFields. point_normals] = inputs[normals_key] if intensities_key is not None and intensities_key in inputs: inputs[standard_fields.InputDataFields. point_intensities] = inputs[intensities_key] if elongations_key is not None and elongations_key in inputs: inputs[standard_fields.InputDataFields. point_elongations] = inputs[elongations_key] if semantic_labels_key is not None and semantic_labels_key in inputs: inputs[standard_fields.InputDataFields. object_class_points] = inputs[semantic_labels_key] if motion_labels_key is not None and motion_labels_key in inputs: inputs[standard_fields.InputDataFields. object_flow_points] = inputs[motion_labels_key] if spin_coords_key is not None and spin_coords_key in inputs: inputs[standard_fields.InputDataFields. point_spin_coordinates] = inputs[spin_coords_key] # Acquire point / image correspondences. if images_points_correspondence_fn is not None: fn_outputs = images_points_correspondence_fn(inputs) if 'points_position' in fn_outputs: inputs[standard_fields.InputDataFields. point_positions] = fn_outputs['points_position'] if 'points_intensity' in fn_outputs and intensities_key is not None: inputs[standard_fields.InputDataFields. point_intensities] = fn_outputs['points_intensity'] if 'points_elongation' in fn_outputs and elongations_key is not None: inputs[standard_fields.InputDataFields. point_elongations] = fn_outputs['points_elongation'] if 'points_label' in fn_outputs and semantic_labels_key is not None: inputs[standard_fields.InputDataFields. object_class_points] = fn_outputs['points_label'] if 'view_images' in fn_outputs: for key in sorted(fn_outputs['view_images']): if len(fn_outputs['view_images'][key].shape) != 4: raise ValueError(('%s image should have rank 4.' % key)) view_image_inputs = fn_outputs['view_images'] if 'view_indices_2d' in fn_outputs: for key in sorted(fn_outputs['view_indices_2d']): if len(fn_outputs['view_indices_2d'][key].shape) != 3: raise ValueError( ('%s indices_2d should have rank 3.' % key)) view_indices_2d_inputs = fn_outputs['view_indices_2d'] else: if points_in_image_frame_key is not None: inputs['rgb_view/features'] = inputs['image'] inputs['rgb_view/indices_2d'] = inputs[points_in_image_frame_key] if len(inputs['rgb_view/indices_2d'].shape) != 3: raise ValueError('`points_in_image_frame` should have rank 3.') frame0 = inputs.copy() if num_frame_to_load > 1: point_positions_list = [ frame0[standard_fields.InputDataFields.point_positions] ] if view_indices_2d_inputs: view_indices_2d_list = [view_indices_2d_inputs[view_names[0]]] frame_source_list = [ tf.zeros([ tf.shape( frame0[standard_fields.InputDataFields.point_positions])[0] ], tf.int32) ] for i in range(1, num_frame_to_load): target_frame_key = 'frame' + str(i) if images_points_correspondence_fn is not None: frame_i = images_points_correspondence_fn( all_frame_inputs[target_frame_key]) else: raise ValueError( 'images_points_correspondence_fn is needed for loading multi-frame pointclouds.' ) transformed_point_positions = transform_points_fn( scene, frame_i['points_position'], frame_start_index, i + frame_start_index) point_positions_list.append(transformed_point_positions) if view_indices_2d_inputs: view_indices_2d_list.append( frame_i['view_indices_2d'][view_names[0]]) frame_source_list.append( tf.ones([tf.shape(transformed_point_positions)[0]], tf.int32) * i) # add multi-frame info to override inputs and view_indices_2d_inputs inputs[standard_fields.InputDataFields. point_frame_index] = tf.expand_dims(tf.concat(frame_source_list, axis=0), axis=1) inputs[standard_fields.InputDataFields.point_positions] = tf.concat( point_positions_list, axis=0) if view_indices_2d_inputs: view_indices_2d_inputs[view_names[0]] = tf.concat( view_indices_2d_list, axis=1) # Validate inputs. if standard_fields.InputDataFields.point_positions not in inputs: raise ValueError('`inputs` must contain a point_positions') if inputs[ standard_fields.InputDataFields.point_positions].shape.ndims != 2: raise ValueError('points must be of rank 2.') if inputs[standard_fields.InputDataFields.point_positions].shape[1] != 3: raise ValueError('point should be 3 dimensional.') # Remove normal nans. if standard_fields.InputDataFields.point_normals in inputs: inputs[standard_fields.InputDataFields.point_normals] = tf.where( tf.math.is_nan( inputs[standard_fields.InputDataFields.point_normals]), tf.zeros_like( inputs[standard_fields.InputDataFields.point_normals]), inputs[standard_fields.InputDataFields.point_normals]) # Compute semantic labels if compute_semantic_labels_fn is not None # An example is when the ground-truth contains 3d object boxes and not per # point labels. This would be a function that infers point labels from boxes. if compute_semantic_labels_fn is not None: inputs[standard_fields.InputDataFields. object_class_points] = compute_semantic_labels_fn( inputs=frame0, points_key=standard_fields.InputDataFields.point_positions) if compute_motion_labels_fn is not None: inputs[standard_fields.InputDataFields. object_flow_points] = compute_motion_labels_fn( scene=scene, frame0=frame0, frame1=frame1, frame_start_index=frame_start_index, points_key=standard_fields.InputDataFields.point_positions) # Splitting inputs to {view_image_inputs, # view_indices_2d_inputs, # mesh_inputs, # non_tensor_inputs} mesh_keys = [] for key in [ standard_fields.InputDataFields.point_positions, standard_fields.InputDataFields.point_colors, standard_fields.InputDataFields.point_normals, standard_fields.InputDataFields.point_intensities, standard_fields.InputDataFields.point_elongations, standard_fields.InputDataFields.object_class_points, standard_fields.InputDataFields.point_spin_coordinates, standard_fields.InputDataFields.object_flow_points, standard_fields.InputDataFields.point_frame_index, ]: if key is not None and key in inputs: mesh_keys.append(key) view_image_names = [('%s/features' % key) for key in view_names] view_indices_2d_names = [('%s/indices_2d' % key) for key in view_names] # Additional key collecting for k, v in six.iteritems(inputs): if k in view_image_names: view_image_inputs[k] = v elif k in view_indices_2d_names: view_indices_2d_inputs[k] = v elif k in mesh_keys: if num_frame_to_load > 1: pad_size = tf.shape( inputs[standard_fields.InputDataFields. point_positions])[0] - tf.shape(v)[0] if k == standard_fields.InputDataFields.object_class_points: pad_value = -1 else: pad_value = 0 v = tf.pad(v, [[0, pad_size], [0, 0]], constant_values=pad_value) mesh_inputs[k] = v else: non_tensor_inputs[k] = v # Remove points that are not in the lidar first return (optional) if only_keep_first_return_lidar_points: _remove_second_return_lidar_points( mesh_inputs=mesh_inputs, view_indices_2d_inputs=view_indices_2d_inputs) # Randomly sample points preprocessor_utils.randomly_sample_points( mesh_inputs=mesh_inputs, view_indices_2d_inputs=view_indices_2d_inputs, target_num_points=num_points_to_randomly_sample) # Add weights if it does not exist in inputs. The weight of the points with # label in `ignore_labels` is set to 0. This helps the loss and metrics to # ignore those labels. use_weights = ( standard_fields.InputDataFields.object_class_points in mesh_inputs or standard_fields.InputDataFields.object_flow_points in mesh_inputs) if use_weights: if num_frame_to_load > 1: num_valid_points_frame0 = tf.shape( frame0[standard_fields.InputDataFields.point_positions])[0] num_additional_frame_points = tf.shape( mesh_inputs[standard_fields.InputDataFields. object_class_points])[0] - num_valid_points_frame0 weights = tf.concat([ tf.ones([num_valid_points_frame0, 1], tf.float32), tf.zeros([num_additional_frame_points, 1], tf.float32) ], axis=0) else: weights = tf.ones_like(mesh_inputs[ standard_fields.InputDataFields.object_class_points], dtype=tf.float32) if standard_fields.InputDataFields.object_class_points in mesh_inputs: mesh_inputs[ standard_fields.InputDataFields.object_class_points] = tf.cast( mesh_inputs[ standard_fields.InputDataFields.object_class_points], dtype=tf.int32) for ignore_label in ignore_labels: weights *= tf.cast(tf.not_equal( mesh_inputs[ standard_fields.InputDataFields.object_class_points], ignore_label), dtype=tf.float32) mesh_inputs[ standard_fields.InputDataFields.point_loss_weights] = weights mesh_inputs[standard_fields.InputDataFields. object_class_points] += semantic_labels_offset # We normalize the intensities and elongations to be in a smaller range. if standard_fields.InputDataFields.point_intensities in mesh_inputs: mesh_inputs[standard_fields.InputDataFields. point_intensities] = change_intensity_range( intensities=mesh_inputs[ standard_fields.InputDataFields.point_intensities]) if standard_fields.InputDataFields.point_elongations in mesh_inputs: mesh_inputs[ standard_fields.InputDataFields.point_elongations] = (tf.cast( mesh_inputs[standard_fields.InputDataFields.point_elongations], dtype=tf.float32) * 2.0 / 255.0) - 1.0 # Random scale the points. if min_scale_ratio is not None and max_scale_ratio is not None: scale_ratio = tf.random.uniform([], minval=min_scale_ratio, maxval=max_scale_ratio, dtype=tf.float32) mesh_inputs[ standard_fields.InputDataFields.point_positions] *= scale_ratio if standard_fields.InputDataFields.object_flow_points in mesh_inputs: mesh_inputs[standard_fields.InputDataFields. object_flow_points] *= scale_ratio # Random crop the points. randomly_crop_points(mesh_inputs=mesh_inputs, view_indices_2d_inputs=view_indices_2d_inputs, x_random_crop_size=x_random_crop_size, y_random_crop_size=y_random_crop_size) # If training, pick the best labeled image and points that project to it. # In many datasets, only one image is labeled anyways. if remove_unlabeled_images_and_points: pick_labeled_image(mesh_inputs=mesh_inputs, view_image_inputs=view_image_inputs, view_indices_2d_inputs=view_indices_2d_inputs, view_name=labeled_view_name) # Process images. preprocessor_utils.preprocess_images( view_image_inputs=view_image_inputs, view_indices_2d_inputs=view_indices_2d_inputs, image_preprocess_fn_dic=image_preprocess_fn_dic, is_training=is_training) # Record the original points. original_points = mesh_inputs[ standard_fields.InputDataFields.point_positions] if standard_fields.InputDataFields.point_colors in mesh_inputs: original_colors = mesh_inputs[ standard_fields.InputDataFields.point_colors] if standard_fields.InputDataFields.point_normals in mesh_inputs: original_normals = mesh_inputs[ standard_fields.InputDataFields.point_normals] # Update feature visibility count. if 'feature_visibility_count' in mesh_inputs: mesh_inputs['feature_visibility_count'] = tf.maximum( mesh_inputs['feature_visibility_count'], 1) mesh_inputs['features'] /= tf.cast( mesh_inputs['feature_visibility_count'], dtype=tf.float32) # Subtract mean from points. mean_points = tf.reduce_mean( mesh_inputs[standard_fields.InputDataFields.point_positions], axis=0) mesh_inputs[ standard_fields.InputDataFields.point_positions] -= tf.expand_dims( mean_points, axis=0) # Rotate points randomly. if standard_fields.InputDataFields.point_normals in mesh_inputs: normals = mesh_inputs[standard_fields.InputDataFields.point_normals] else: normals = None if standard_fields.InputDataFields.object_flow_points in mesh_inputs: motions = mesh_inputs[ standard_fields.InputDataFields.object_flow_points] else: motions = None (mesh_inputs[standard_fields.InputDataFields.point_positions], rotated_normals, rotated_motions) = rotate_randomly( points=mesh_inputs[standard_fields.InputDataFields.point_positions], normals=normals, motions=motions, x_min_degree_rotation=x_min_degree_rotation, x_max_degree_rotation=x_max_degree_rotation, y_min_degree_rotation=y_min_degree_rotation, y_max_degree_rotation=y_max_degree_rotation, z_min_degree_rotation=z_min_degree_rotation, z_max_degree_rotation=z_max_degree_rotation) # Random flipping in x and y directions. (mesh_inputs[standard_fields.InputDataFields.point_positions], flipped_normals, flipped_motions) = flip_randomly_points_and_normals_motions( points=mesh_inputs[standard_fields.InputDataFields.point_positions], normals=rotated_normals, motions=rotated_motions, is_training=is_training) if standard_fields.InputDataFields.point_normals in mesh_inputs: mesh_inputs[ standard_fields.InputDataFields.point_normals] = flipped_normals if standard_fields.InputDataFields.object_flow_points in mesh_inputs: mesh_inputs[standard_fields.InputDataFields. object_flow_points] = flipped_motions # Normalize RGB to [-1.0, 1.0]. if standard_fields.InputDataFields.point_colors in mesh_inputs: mesh_inputs[standard_fields.InputDataFields.point_colors] = tf.cast( mesh_inputs[standard_fields.InputDataFields.point_colors], dtype=tf.float32) mesh_inputs[standard_fields.InputDataFields.point_colors] *= (2.0 / 255.0) mesh_inputs[standard_fields.InputDataFields.point_colors] -= 1.0 # Add original points to mesh inputs. mesh_inputs[standard_fields.InputDataFields. point_positions_original] = original_points if standard_fields.InputDataFields.point_colors in mesh_inputs: mesh_inputs[standard_fields.InputDataFields. point_colors_original] = original_colors if standard_fields.InputDataFields.point_normals in mesh_inputs: mesh_inputs[standard_fields.InputDataFields. point_normals_original] = original_normals # Pad or clip the point tensors. pad_or_clip(mesh_inputs=mesh_inputs, view_indices_2d_inputs=view_indices_2d_inputs, pad_or_clip_size=points_pad_or_clip_size) if num_frame_to_load > 1: # Note: num_valid_points is the sum of 'num_points_per_fram' for now. # num_points_per_frame is each frame's valid num of points. # TODO(huangrui): if random sampling is called earlier, the count here # is not guaranteed to be in order. need sorting. if num_points_to_randomly_sample is not None: raise ValueError( 'randomly sample is not compatible with padding multi frame point clouds yet!' ) _, _, mesh_inputs[standard_fields.InputDataFields. num_valid_points_per_frame] = tf.unique_with_counts( tf.reshape( mesh_inputs[standard_fields.InputDataFields. point_frame_index], [-1])) if points_pad_or_clip_size is not None: padded_points = tf.where_v2( tf.greater( points_pad_or_clip_size, mesh_inputs[ standard_fields.InputDataFields.num_valid_points]), points_pad_or_clip_size - mesh_inputs[standard_fields.InputDataFields.num_valid_points], 0) # Correct the potential unique count error from optionally padded 0s point # frame index. mesh_inputs[ standard_fields.InputDataFields. num_valid_points_per_frame] -= tf.pad( tf.expand_dims(padded_points, 0), [[ 0, tf.shape(mesh_inputs[standard_fields.InputDataFields. num_valid_points_per_frame])[0] - 1 ]]) # Putting back the dictionaries together processed_inputs = mesh_inputs.copy() processed_inputs.update(non_tensor_inputs) for key in sorted(view_image_inputs): processed_inputs[('%s/features' % key)] = view_image_inputs[key] for key in sorted(view_indices_2d_inputs): processed_inputs[('%s/indices_2d' % key)] = view_indices_2d_inputs[key] # Create features that do not exist if 'point_offsets' in point_feature_keys: preprocessor_utils.add_point_offsets( inputs=processed_inputs, voxel_grid_cell_size=voxel_grid_cell_size) if 'point_offset_bins' in point_feature_keys: preprocessor_utils.add_point_offset_bins( inputs=processed_inputs, voxel_grid_cell_size=voxel_grid_cell_size, num_bins_x=num_offset_bins_x, num_bins_y=num_offset_bins_y, num_bins_z=num_offset_bins_z) # Voxelize point features preprocessor_utils.voxelize_point_features( inputs=processed_inputs, voxels_pad_or_clip_size=voxels_pad_or_clip_size, voxel_grid_cell_size=voxel_grid_cell_size, point_feature_keys=point_feature_keys, point_to_voxel_segment_func=point_to_voxel_segment_func, num_frame_to_load=num_frame_to_load) # Voxelize point / image correspondence indices preprocessor_utils.voxelize_point_to_view_correspondences( inputs=processed_inputs, view_indices_2d_inputs=view_indices_2d_inputs, voxels_pad_or_clip_size=voxels_pad_or_clip_size, voxel_grid_cell_size=voxel_grid_cell_size) # Voxelizing the semantic labels preprocessor_utils.voxelize_semantic_labels( inputs=processed_inputs, voxels_pad_or_clip_size=voxels_pad_or_clip_size, voxel_grid_cell_size=voxel_grid_cell_size) # Voxelizing the loss weights preprocessor_utils.voxelize_property_tensor( inputs=processed_inputs, point_tensor_key=standard_fields.InputDataFields.point_loss_weights, corresponding_voxel_tensor_key=standard_fields.InputDataFields. voxel_loss_weights, voxels_pad_or_clip_size=voxels_pad_or_clip_size, voxel_grid_cell_size=voxel_grid_cell_size, segment_func=tf.math.unsorted_segment_max) # Voxelizing the object flow if standard_fields.InputDataFields.object_flow_points in processed_inputs: preprocessor_utils.voxelize_property_tensor( inputs=processed_inputs, point_tensor_key=standard_fields.InputDataFields. object_flow_points, corresponding_voxel_tensor_key='object_flow_voxels_max', voxels_pad_or_clip_size=voxels_pad_or_clip_size, voxel_grid_cell_size=voxel_grid_cell_size, segment_func=tf.math.unsorted_segment_max) preprocessor_utils.voxelize_property_tensor( inputs=processed_inputs, point_tensor_key=standard_fields.InputDataFields. object_flow_points, corresponding_voxel_tensor_key='object_flow_voxels_min', voxels_pad_or_clip_size=voxels_pad_or_clip_size, voxel_grid_cell_size=voxel_grid_cell_size, segment_func=tf.math.unsorted_segment_min) processed_inputs[standard_fields.InputDataFields. object_flow_voxels] = processed_inputs[ 'object_flow_voxels_max'] + processed_inputs[ 'object_flow_voxels_min'] if num_frame_to_load > 1: mesh_inputs[ standard_fields.InputDataFields.num_valid_points] = mesh_inputs[ standard_fields.InputDataFields.num_valid_points_per_frame][0] # Filter preprocessed_inputs by output_keys if it is not None. if output_keys is not None: processed_inputs = { k: v for k, v in six.iteritems(processed_inputs) if k in output_keys } return processed_inputs
def false_fn(): if include_forward_rewards: return plain_rewards(states, actions, rewards, next_states, contexts) else: return tf.zeros_like(rewards), tf.ones_like(rewards)
def true_fn(): if include_reset_rewards: return reset_reward_function(states, actions, rewards, next_states, [reset_states] + contexts[1:]) else: return tf.zeros_like(rewards), tf.ones_like(rewards)
def prepare_lidar_images_and_correspondences( inputs, resized_image_height, resized_image_width, camera_names=('front', 'front_left', 'front_right', 'side_left', 'side_right'), lidar_names=('top', 'front', 'side_left', 'side_right', 'rear')): """Integrates and returns the lidars, cameras and their correspondences. Args: inputs: A dictionary containing the images and point / pixel correspondences. resized_image_height: Target height of the images. resized_image_width: Target width of the images. camera_names: List of cameras to include images from. lidar_names: List of lidars to include point clouds from. Returns: A tf.float32 tensor of size [num_points, 3] containing point positions. A tf.float32 tensor of size [num_points, 1] containing point intensities. A tf.float32 tensor of size [num_points, 1] containing point elongations. A tf.float32 tensor of size [num_points, 3] containing point normals. A tf.float32 tensor of size [num_images, resized_image_height, resized_image_width, 3]. A tf.int32 tensor of size [num_images, num_points, 2]. Raises: ValueError: If camera_names or lidar_names are empty lists. """ if not camera_names: raise ValueError('camera_names should contain at least one name.') if not lidar_names: raise ValueError('lidar_names should contain at least one name.') (points_position, points_intensity, points_elongation, points_normal, points_in_image_frame_yx, points_in_image_frame_id) = _prepare_lidar_points( inputs=inputs, lidar_names=lidar_names) images = [] points_in_image_frame = [] for camera_name in camera_names: image_key = ('cameras/%s/image' % camera_name) image_height = tf.shape(inputs[image_key])[0] image_width = tf.shape(inputs[image_key])[1] height_ratio = tf.cast( resized_image_height, dtype=tf.float32) / tf.cast( image_height, dtype=tf.float32) width_ratio = tf.cast( resized_image_width, dtype=tf.float32) / tf.cast( image_width, dtype=tf.float32) if tf.executing_eagerly(): resize_method = tf.image.ResizeMethod.NEAREST_NEIGHBOR else: resize_method = tf.image.ResizeMethod.BILINEAR if inputs[image_key].dtype in [ tf.int8, tf.uint8, tf.int16, tf.uint16, tf.int32, tf.int64 ]: resize_method = tf.image.ResizeMethod.NEAREST_NEIGHBOR images.append( tf.image.resize( images=inputs[image_key], size=[resized_image_height, resized_image_width], method=resize_method, antialias=True)) camera_id = tf.cast(inputs[('cameras/%s/id' % camera_name)], dtype=tf.int32) valid_points = tf.equal(points_in_image_frame_id, camera_id) valid_points = tf.tile(valid_points, [1, 2]) point_coords = tf.cast( tf.cast(points_in_image_frame_yx, dtype=tf.float32) * tf.stack([height_ratio, width_ratio]), dtype=tf.int32) points_in_image_frame_camera = tf.where( valid_points, point_coords, -tf.ones_like(valid_points, dtype=tf.int32)) points_in_image_frame.append(points_in_image_frame_camera) num_images = len(images) images = tf.stack(images, axis=0) images.set_shape([num_images, resized_image_height, resized_image_width, 3]) points_in_image_frame = tf.stack(points_in_image_frame, axis=0) return { 'points_position': points_position, 'points_intensity': points_intensity, 'points_elongation': points_elongation, 'points_normal': points_normal, 'view_images': {'rgb_view': images}, 'view_indices_2d': {'rgb_view': points_in_image_frame} }