def _remove_second_return_lidar_points(mesh_inputs, view_indices_2d_inputs): """removes the points that are not lidar first-return .""" if standard_fields.InputDataFields.point_spin_coordinates not in mesh_inputs: raise ValueError('spin_coordinates not in mesh_inputs.') first_return_mask = tf.equal( tf.cast(mesh_inputs[ standard_fields.InputDataFields.point_spin_coordinates][:, 2], dtype=tf.int32), 0) for key in sorted(mesh_inputs): mesh_inputs[key] = tf.boolean_mask(mesh_inputs[key], first_return_mask) for key in sorted(view_indices_2d_inputs): view_indices_2d_inputs[key] = tf.transpose( tf.boolean_mask( tf.transpose(view_indices_2d_inputs[key], [1, 0, 2]), first_return_mask), [1, 0, 2])
def _get_joint_loss_outputs(self, inputs): outputs = [] for id_of_model, model in self.ids_to_models.items(): outputs.append( model(self._get_model_inputs(id_of_model, inputs), apply_projection_layer=False)) outputs = tf.stack(outputs) outputs = tf.transpose(outputs, perm=[1, 0, 2]) outputs = self.dropout_layer(outputs) outputs = self.transformer_layer(outputs) outputs = tf.transpose(outputs, perm=[1, 0, 2]) outputs = tf.unstack(outputs) outputs = self._project_with_submodels(outputs) outputs = tf.reduce_sum(outputs, axis=0) return outputs
def update_state(self, inputs, outputs): """Function that updates the metric state at each example. Args: inputs: A dictionary containing input tensors. outputs: A dictionary containing output tensors. Returns: Update op. """ detections_score = tf.reshape( outputs[standard_fields.DetectionResultFields.objects_score], [-1]) detections_class = tf.reshape( outputs[standard_fields.DetectionResultFields.objects_class], [-1]) num_detections = tf.shape(detections_score)[0] detections_instance_mask = tf.reshape( outputs[ standard_fields.DetectionResultFields.instance_segments_voxel_mask], [num_detections, -1]) gt_class = tf.reshape(inputs[standard_fields.InputDataFields.objects_class], [-1]) num_gt = tf.shape(gt_class)[0] gt_voxel_instance_ids = tf.reshape( inputs[standard_fields.InputDataFields.object_instance_id_voxels], [-1]) gt_instance_masks = tf.transpose( tf.one_hot(gt_voxel_instance_ids - 1, depth=num_gt, dtype=tf.float32)) for c in self.class_range: gt_mask_c = tf.equal(gt_class, c) num_gt_c = tf.math.reduce_sum(tf.cast(gt_mask_c, dtype=tf.int32)) gt_instance_masks_c = tf.boolean_mask(gt_instance_masks, gt_mask_c) detections_mask_c = tf.equal(detections_class, c) num_detections_c = tf.math.reduce_sum( tf.cast(detections_mask_c, dtype=tf.int32)) if num_detections_c == 0: continue det_scores_c = tf.boolean_mask(detections_score, detections_mask_c) det_instance_mask_c = tf.boolean_mask(detections_instance_mask, detections_mask_c) det_scores_c, sorted_indices = tf.math.top_k( det_scores_c, k=num_detections_c) det_instance_mask_c = tf.gather(det_instance_mask_c, sorted_indices) tp_c = tf.zeros([num_detections_c], dtype=tf.int32) if num_gt_c > 0: ious_c = instance_segmentation_utils.points_mask_iou( masks1=gt_instance_masks_c, masks2=det_instance_mask_c) max_overlap_gt_ids = tf.cast( tf.math.argmax(ious_c, axis=0), dtype=tf.int32) is_gt_box_detected = tf.zeros([num_gt_c], dtype=tf.int32) for i in tf.range(num_detections_c): gt_id = max_overlap_gt_ids[i] if (ious_c[gt_id, i] > self.iou_threshold and is_gt_box_detected[gt_id] == 0): tp_c = tf.maximum( tf.one_hot(i, num_detections_c, dtype=tf.int32), tp_c) is_gt_box_detected = tf.maximum( tf.one_hot(gt_id, num_gt_c, dtype=tf.int32), is_gt_box_detected) self.tp[c] = tf.concat([self.tp[c], tp_c], axis=0) self.scores[c] = tf.concat([self.scores[c], det_scores_c], axis=0) self.num_gt[c] += num_gt_c return tf.no_op()
def _build_bisimulation_target(self): """Build the bisimulation target.""" batch_size = tf.shape(self.rewards_ph)[0] r1 = tf.tile([self.rewards_ph], [batch_size, 1]) r2 = tf.transpose(r1) reward_differences = tf.abs(r1 - r2) reward_differences = tf.reshape(reward_differences, (batch_size**2, 1)) next_state_distances = self.bisim_horizon_ph * self.s2_target_distances return reward_differences + self.gamma * next_state_distances
def proto_maml_fc_layer_init_fn(labels, embeddings, weights, biases, prototype_multiplier): """Return a list of operations for reparameterized ProtoNet initialization.""" # This is robust to classes missing from the training set, but assumes that # the last class is present. num_ways = tf.cast( tf.math.reduce_max(input_tensor=tf.unique(labels)[0]) + 1, tf.int32) # When there are no examples for a given class, we default its prototype to # zeros, per the implementation of `tf.math.unsorted_segment_mean`. prototypes = tf.math.unsorted_segment_mean(embeddings, labels, num_ways) # Scale the prototypes, which acts as a regularizer on the weights and biases. prototypes *= prototype_multiplier # logit = -<squared Euclidian distance to prototype> # = -(x - p)^T.(x - p) # = 2 x^T.p - p^T.p - x^T.x # = x^T.w + b # where w = 2p, b = -p^T.p output_weights = tf.transpose(a=2 * prototypes) output_biases = -tf.reduce_sum(input_tensor=prototypes * prototypes, axis=1) # We zero-pad to align with the original weights and biases. output_weights = tf.pad(tensor=output_weights, paddings=[[0, 0], [ 0, tf.shape(input=weights)[1] - tf.shape(input=output_weights)[1] ]], mode='CONSTANT', constant_values=0) output_biases = tf.pad(tensor=output_biases, paddings=[[ 0, tf.shape(input=biases)[0] - tf.shape(input=output_biases)[0] ]], mode='CONSTANT', constant_values=0) return [ weights.assign(output_weights), biases.assign(output_biases), ]
def compute_logits_for_episode(self, support_embeddings, query_embeddings, data): """Compute CrossTransformer logits.""" with tf.variable_scope('tformer_keys', reuse=tf.AUTO_REUSE): support_keys, key_params = functional_backbones.conv( support_embeddings, [1, 1], self.query_dim, 1, weight_decay=self.tformer_weight_decay) query_queries, _ = functional_backbones.conv( query_embeddings, [1, 1], self.query_dim, 1, params=key_params, weight_decay=self.tformer_weight_decay) with tf.variable_scope('tformer_values', reuse=tf.AUTO_REUSE): support_values, value_params = functional_backbones.conv( support_embeddings, [1, 1], self.val_dim, 1, weight_decay=self.tformer_weight_decay) query_values, _ = functional_backbones.conv( query_embeddings, [1, 1], self.val_dim, 1, params=value_params, weight_decay=self.tformer_weight_decay) onehot_support_labels = distribute_utils.aggregate( data.onehot_support_labels) support_keys = distribute_utils.aggregate(support_keys) support_values = distribute_utils.aggregate(support_values) labels = tf.argmax(onehot_support_labels, axis=1) if self.rematerialize: distances = self._get_dist_rematerialize(query_queries, query_values, support_keys, support_values, labels) else: distances = self._get_dist(query_queries, query_values, support_keys, support_values, labels) self.test_logits = -tf.transpose(distances) return self.test_logits
def proto_maml_fc_weights(self, prototypes, zero_pad_to_max_way=False): """Computes the Prototypical MAML fc layer's weights. Args: prototypes: Tensor of shape [num_classes, embedding_size] zero_pad_to_max_way: Whether to zero padd to max num way. Returns: fc_weights: Tensor of shape [embedding_size, num_classes] or [embedding_size, self.logit_dim] when zero_pad_to_max_way is True. """ fc_weights = 2 * prototypes fc_weights = tf.transpose(fc_weights) if zero_pad_to_max_way: paddings = [[0, 0], [0, self.logit_dim - tf.shape(fc_weights)[1]]] fc_weights = tf.pad(fc_weights, paddings, 'CONSTANT', constant_values=0) return fc_weights
def _calc_cost_for_action_sequence(self, time_step: ActionTimeStep, state, ac_seqs): """ Args: time_step (ActionTimeStep): input data for next step prediction state (MbrlState): input state for next step prediction ac_seqs: action_sequence (tf.Tensor) of shape [batch_size, population_size, solution_dim]), where solution_dim = planning_horizon * num_actions Returns: cost (tf.Tensor) with shape [batch_size, population_size] """ obs = time_step.observation batch_size = obs.shape[0] init_costs = tf.zeros([batch_size, self._population_size]) ac_seqs = tf.reshape( ac_seqs, [batch_size, self._population_size, self._planning_horizon, -1]) ac_seqs = tf.reshape(tf.transpose(ac_seqs, [2, 0, 1, 3]), [self._planning_horizon, -1, self._num_actions]) state = state._replace(dynamics=state.dynamics._replace(feature=obs)) init_obs = self._expand_to_population(obs) state = tf.nest.map_structure(self._expand_to_population, state) obs = init_obs cost = 0 for i in range(ac_seqs.shape[0]): action = ac_seqs[i] time_step = time_step._replace(prev_action=action) time_step, state = self._dynamics_func(time_step, state) next_obs = time_step.observation # Note: currently using (next_obs, action), might need to # consider (obs, action) in order to be more compatible # with the conventional definition of reward function reward_step = self._reward_func(next_obs, action) cost = cost - reward_step obs = next_obs # reshape cost back to [batch size, population_size] cost = tf.reshape(cost, [batch_size, -1]) return cost
def compute_motion_labels(scene, frame0, frame1, frame_start_index, points_key, box_margin=0.1): """Compute motion label for each point. Args: scene: dict of tensor containing scene. frame0: dict of tensor containing points and objects. frame1: dict of tensor containing points and objects. frame_start_index: starting frame index. points_key: A string corresponding to the tensor of point positions in inputs. box_margin: A margin value to enlarge box, so that surrounding points are included. Returns: A motion tensor of [N, 3] shape. """ point_positions = frame0[points_key] frame0_object_names = frame0['objects/name'] frame1_object_names = frame1['objects/name'] bool_matrix = tf.math.equal( tf.expand_dims(frame0_object_names, axis=1), tf.expand_dims(frame1_object_names, axis=0)) match_indices = tf.where(bool_matrix) # object box level box_dimension = tf.gather( frame0['objects/shape/dimension'], match_indices[:, 0], axis=0) boxes_length = box_dimension[:, 0:1] boxes_width = box_dimension[:, 1:2] boxes_height = box_dimension[:, 2:3] boxes_rotation_matrix = tf.gather( frame0['objects/pose/R'], match_indices[:, 0], axis=0) boxes_center = tf.gather( frame0['objects/pose/t'], match_indices[:, 0], axis=0) frame1_box_rotation_matrix = tf.gather( frame1['objects/pose/R'], match_indices[:, 1], axis=0) frame1_box_center = tf.gather( frame1['objects/pose/t'], match_indices[:, 1], axis=0) # frame level frame0_rotation = scene['frames/pose/R'][frame_start_index] frame1_rotation = scene['frames/pose/R'][frame_start_index + 1] frame0_translation = scene['frames/pose/t'][frame_start_index] frame1_translation = scene['frames/pose/t'][frame_start_index + 1] frame1_box_center_global = tf.tensordot( frame1_box_center, frame1_rotation, axes=(1, 1)) + frame1_translation frame1_box_center_in_frame0 = tf.tensordot( frame1_box_center_global - frame0_translation, frame0_rotation, axes=(1, 0)) # only find index on boxes that are matched between two frames points_box_index = box_utils.map_points_to_boxes( points=point_positions, boxes_length=boxes_length, boxes_height=boxes_height, boxes_width=boxes_width, boxes_rotation_matrix=boxes_rotation_matrix, boxes_center=boxes_center, box_margin=box_margin) # TODO(huangrui): disappered object box have 0 motion. # Probably consider set to nan or ignore_label. # 1. gather points in surviving matched box only, # and replicate rotation/t to same length; # 2. get points in box frame, apply new rotation/t per point; # 3. new location minus old location -> motion vector; # 4. scatter it to a larger motion_vector with 0 for # points ouside of matched boxes. # Need to limit boxes to those matched boxes. # otherwise the points_box_index will contain useless box. # index in all point array, of points that are inside the box. points_inside_box_index = tf.where(points_box_index + 1)[:, 0] box_index = tf.gather(points_box_index, points_inside_box_index) points_inside_box = tf.gather(point_positions, points_inside_box_index) box_rotation_per_point = tf.gather(boxes_rotation_matrix, box_index) box_center_per_point = tf.gather(boxes_center, box_index) # Tensor [N, 3, 3] and [N, 3]. note we are transform points reversely. points_in_box_frame = tf.einsum('ikj,ik->ij', box_rotation_per_point, points_inside_box - box_center_per_point) # Transform rotation of box from frame1 coordinate to frame0 coordinate # note, transpose is implemented via changing summation axis frame1_box_rotation_matrix_global = tf.transpose( tf.tensordot(frame1_rotation, frame1_box_rotation_matrix, axes=(1, 1)), perm=(1, 0, 2)) frame1_box_rotation_matrix_in_frame0 = tf.transpose( tf.tensordot( frame0_rotation, frame1_box_rotation_matrix_global, axes=(0, 1)), perm=(1, 0, 2)) # this is the points_position_after_following_frame1_box's motion. frame1_box_rotation_in_frame0_per_point = tf.gather( frame1_box_rotation_matrix_in_frame0, box_index) frame1_box_center_in_frame0_per_point = tf.gather(frame1_box_center_in_frame0, box_index) points_in_box_frame1 = tf.einsum( 'ijk,ik->ij', frame1_box_rotation_in_frame0_per_point, points_in_box_frame) + frame1_box_center_in_frame0_per_point motion_vector = points_in_box_frame1 - points_inside_box scattered_vector = tf.scatter_nd( indices=tf.expand_dims(points_inside_box_index, axis=1), updates=motion_vector, shape=tf.shape(point_positions, out_type=tf.dtypes.int64)) return scattered_vector
def randomly_crop_points(mesh_inputs, view_indices_2d_inputs, x_random_crop_size, y_random_crop_size, epsilon=1e-5): """Randomly crops points. Args: mesh_inputs: A dictionary containing input mesh (point) tensors. view_indices_2d_inputs: A dictionary containing input point to view correspondence tensors. x_random_crop_size: Size of the random crop in x dimension. If None, random crop will not take place on x dimension. y_random_crop_size: Size of the random crop in y dimension. If None, random crop will not take place on y dimension. epsilon: Epsilon (a very small value) used to add as a small margin to thresholds. """ if x_random_crop_size is None and y_random_crop_size is None: return points = mesh_inputs[standard_fields.InputDataFields.point_positions] num_points = tf.shape(points)[0] # Pick a random point if x_random_crop_size is not None or y_random_crop_size is not None: random_index = tf.random.uniform([], minval=0, maxval=num_points, dtype=tf.int32) center_x = points[random_index, 0] center_y = points[random_index, 1] points_x = points[:, 0] points_y = points[:, 1] min_x = tf.reduce_min(points_x) - epsilon max_x = tf.reduce_max(points_x) + epsilon min_y = tf.reduce_min(points_y) - epsilon max_y = tf.reduce_max(points_y) + epsilon if x_random_crop_size is not None: min_x = center_x - x_random_crop_size / 2.0 - epsilon max_x = center_x + x_random_crop_size / 2.0 + epsilon if y_random_crop_size is not None: min_y = center_y - y_random_crop_size / 2.0 - epsilon max_y = center_y + y_random_crop_size / 2.0 + epsilon x_mask = tf.logical_and(tf.greater(points_x, min_x), tf.less(points_x, max_x)) y_mask = tf.logical_and(tf.greater(points_y, min_y), tf.less(points_y, max_y)) points_mask = tf.logical_and(x_mask, y_mask) for key in sorted(mesh_inputs): mesh_inputs[key] = tf.boolean_mask(mesh_inputs[key], points_mask) for key in sorted(view_indices_2d_inputs): view_indices_2d_inputs[key] = tf.transpose( tf.boolean_mask( tf.transpose(view_indices_2d_inputs[key], [1, 0, 2]), points_mask), [1, 0, 2])
def _build_train_op(self, optimizer): """Build the TensorFlow graph used to learn the bisimulation metric. Args: optimizer: a tf.train optimizer. Returns: A TensorFlow op to minimize the bisimulation loss. """ self.online_network = tf.make_template('Online', self._network_template) self.target_network = tf.make_template('Target', self._network_template) self.s1_ph = tf.placeholder(tf.float64, (self.batch_size, 2), name='s1_ph') self.s2_ph = tf.placeholder(tf.float64, (self.batch_size, 2), name='s2_ph') self.s1_online_distances = self.online_network( self._concat_states(self.s1_ph)) self.s1_target_distances = self.target_network( self._concat_states(self.s1_ph)) self.s2_target_distances = self.target_network( self._concat_states(self.s2_ph)) self.action_ph = tf.placeholder(tf.int32, (self.batch_size,)) self.rewards_ph = tf.placeholder(tf.float64, (self.batch_size,)) # We use an expanding horizon for computing the distances. self.bisim_horizon_ph = tf.placeholder(tf.float64, ()) # bisimulation_target_1 = rew_diff + gamma * next_distance. bisimulation_target_1 = tf.stop_gradient(self._build_bisimulation_target()) # bisimulation_target_2 = curr_distance. bisimulation_target_2 = tf.stop_gradient(self.s1_target_distances) # We slowly taper in the maximum according to the bisim horizon. bisimulation_target = tf.maximum( bisimulation_target_1, bisimulation_target_2 * self.bisim_horizon_ph) # We zero-out diagonal entries, since those are estimating the distance # between a state and itself, which we know to be 0. diagonal_mask = 1.0 - tf.diag(tf.ones(self.batch_size, dtype=tf.float64)) diagonal_mask = tf.reshape(diagonal_mask, (self.batch_size**2, 1)) bisimulation_target *= diagonal_mask bisimulation_estimate = self.s1_online_distances # We start with a mask that includes everything. loss_mask = tf.ones(tf.shape(bisimulation_estimate)) # We have to enforce that states being compared are done only using the same # action. indicators = self.action_ph indicators = tf.cast(indicators, tf.float64) # indicators will initially have shape [batch_size], we first tile it: square_ids = tf.tile([indicators], [self.batch_size, 1]) # We subtract square_ids from its transpose: square_ids = square_ids - tf.transpose(square_ids) # At this point all zero-entries are the ones with equal IDs. # Now we would like to convert the zeros in this matrix to 1s, and make # everything else a 0. We can do this with the following operation: loss_mask = 1 - tf.abs(tf.sign(square_ids)) # Now reshape to match the shapes of the estimate and target. loss_mask = tf.reshape(loss_mask, (self.batch_size**2, 1)) larger_targets = bisimulation_target - bisimulation_estimate larger_targets_count = tf.reduce_sum( tf.cast(larger_targets > 0., tf.float64)) tf.summary.scalar('Learning/LargerTargets', larger_targets_count) tf.summary.scalar('Learning/NumUpdates', tf.count_nonzero(loss_mask)) tf.summary.scalar('Learning/BisimHorizon', self.bisim_horizon_ph) bisimulation_loss = tf.losses.mean_squared_error( bisimulation_target, bisimulation_estimate, weights=loss_mask) tf.summary.scalar('Learning/loss', bisimulation_loss) # Plot average distance between sampled representations. average_distance = tf.reduce_mean(bisimulation_estimate) tf.summary.scalar('Approx/AverageDistance', average_distance) return optimizer.minimize(bisimulation_loss)