def compute_pointcloud_weights_based_on_voxel_density(points, grid_cell_size): """Computes pointcloud weights based on voxel density. Args: points: A tf.float32 tensor of size [num_points, 3]. grid_cell_size: The size of the grid cells in x, y, z dimensions in the voxel grid. It should be either a tf.float32 tensor, a numpy array or a list of size [3]. Returns: A tf.float32 tensor of size [num_points, 1] containing weights that are inverse proportional to the denisty of the points in voxels. """ num_points = tf.shape(points)[0] features = tf.ones([num_points, 1], dtype=tf.float32) voxel_features, _, segment_ids, _ = ( pointcloud_to_sparse_voxel_grid_unbatched( points=points, features=features, grid_cell_size=grid_cell_size, segment_func=tf.math.unsorted_segment_sum)) num_voxels = tf.shape(voxel_features)[0] point_features = sparse_voxel_grid_to_pointcloud( voxel_features=tf.expand_dims(voxel_features, axis=0), segment_ids=tf.expand_dims(segment_ids, axis=0), num_valid_voxels=tf.expand_dims(num_voxels, axis=0), num_valid_points=tf.expand_dims(num_points, axis=0)) inverse_point_densities = 1.0 / tf.squeeze(point_features, axis=0) total_inverse_density = tf.reduce_sum(inverse_point_densities) return (inverse_point_densities * tf.cast(num_points, dtype=tf.float32) / total_inverse_density)
def compute_class_distances(self, support_embeddings, onehot_support_labels, query_embeddings): """Return the relation score of each query example to each prototype.""" # `query_embeddings` is [num_examples, 21, 21, num_features]. out_shape = query_embeddings.shape.as_list()[1:] num_features = out_shape[-1] num_query_examples = tf.shape(input=query_embeddings)[0] # [num_classes, 19, 19, num_features]. prototypes = compute_prototypes(support_embeddings, onehot_support_labels) # [num_classes, 19, 19, num_features]. prototype_extended = tf.tile( tf.expand_dims(prototypes, 0), [num_query_examples] + [1] * (1 + len(out_shape))) # [num_query_examples, 19, 19, num_features]. way = onehot_support_labels.shape.as_list()[-1] query_extended = tf.tile( tf.expand_dims(query_embeddings, 1), [1, way] + [1] * len(out_shape)) relation_pairs = tf.concat((prototype_extended, query_extended), len(out_shape) + 1) # relation_pairs.shape.as_list()[-3:] == [-1] + out_shape + [num_features*2] relation_pairs = tf.reshape(relation_pairs, [-1] + out_shape[:-1] + [num_features * 2]) return tf.reshape(self.relation_module_fn(relation_pairs), [-1, way])
def compute_logits(self, support_embeddings, query_embeddings, onehot_support_labels): """Computes the relation score of each query example to each prototype.""" # [n_test, 21, 21, n_features]. query_embed_shape = query_embeddings.shape.as_list() n_feature = query_embed_shape[3] out_shape = query_embed_shape[1:3] n_test = tf.shape(query_embeddings)[0] # [n_test, num_clases, 21, 21, n_feature]. # It is okay one of the elements in the list to be tensor. prototypes = compute_prototypes(support_embeddings, onehot_support_labels) prototype_extended = tf.tile(tf.expand_dims(prototypes, 0), [n_test, 1, 1, 1, 1]) # [num_clases, n_test, 21, 21, n_feature]. query_f_extended = tf.tile( tf.expand_dims(query_embeddings, 1), [1, tf.shape(onehot_support_labels)[-1], 1, 1, 1]) relation_pairs = tf.concat((prototype_extended, query_f_extended), 4) # relation_pairs.shape.as_list()[-3:] == [-1] + out_shape + [n_feature*2] relation_pairs = tf.reshape(relation_pairs, [-1] + out_shape + [n_feature * 2]) relationnet_dict = functional_backbones.relation_module( relation_pairs, 'relationnet') way = tf.shape(onehot_support_labels)[-1] relations = tf.reshape(relationnet_dict['output'], [-1, way]) return relations
def _compute_prototype_loss(self, embeddings, labels, labels_one_hot, prototypes=None): """Computes the loss and accuracy on an episode.""" labels_dense = labels if prototypes is None: # Compute protos. labels = tf.cast(labels_one_hot, tf.float32) # [num examples, 1, embedding size]. embeddings_ = tf.expand_dims(embeddings, 1) # [num examples, num classes, 1]. labels = tf.expand_dims(labels, 2) # Sums each class' embeddings. [num classes, embedding size]. class_sums = tf.reduce_sum(labels * embeddings_, 0) # The prototype of each class is the averaged embedding of its examples. class_num_images = tf.reduce_sum(labels, 0) # [way]. prototypes = class_sums / class_num_images # [way, embedding size]. # Compute logits. embeddings = tf.nn.l2_normalize(embeddings, 1, epsilon=1e-3) prototypes = tf.nn.l2_normalize(prototypes, 1, epsilon=1e-3) logits = tf.matmul(embeddings, prototypes, transpose_b=True) loss = self.compute_loss(labels_one_hot, logits) acc = tf.reduce_mean(self.compute_accuracy(labels_dense, logits)) return loss, acc, prototypes, logits
def _compute_prototypes(embeddings, labels): """Computes class prototypes over the last dimension of embeddings. Args: embeddings: Tensor of examples of shape [num_examples, embedding_size]. labels: Tensor of one-hot encoded labels of shape [num_examples, num_classes]. Returns: prototypes: Tensor of class prototypes of shape [num_classes, embedding_size]. """ labels = tf.cast(labels, tf.float32) # [num examples, 1, embedding size]. embeddings = tf.expand_dims(embeddings, 1) # [num examples, num classes, 1]. labels = tf.expand_dims(labels, 2) # Sums each class' embeddings. [num classes, embedding size]. class_sums = tf.reduce_sum(labels * embeddings, 0) # The prototype of each class is the averaged embedding of its examples. class_num_images = tf.reduce_sum(labels, 0) # [way]. prototypes = class_sums / class_num_images return prototypes
def compute_logits(self, support_embeddings, query_embeddings, onehot_support_labels, cosine_distance=False): """Computes the negative distances of each query point to each prototype.""" prototypes = compute_prototypes(support_embeddings, onehot_support_labels) if cosine_distance: query_embeddings = tf.nn.l2_normalize(query_embeddings, 1, epsilon=1e-3) prototypes = tf.nn.l2_normalize(prototypes, 1, epsilon=1e-3) logits = tf.matmul(query_embeddings, prototypes, transpose_b=True) else: # [num test images, 1, embedding size]. query_embeddings = tf.expand_dims(query_embeddings, 1) # [1, num_clases, embedding_size]. prototypes = tf.expand_dims(prototypes, 0) # Squared euclidean distance between each test embedding / prototype pair. distances = tf.reduce_sum(tf.square(query_embeddings - prototypes), 2) logits = -distances return logits
def embedding_regularization_loss(inputs, outputs, lambda_coef=0.0001, regularization_type='unit_length', is_intermediate=False): """Classification loss with an iou threshold. Args: inputs: A dictionary that contains num_valid_voxels - A tf.int32 tensor of size [batch_size]. instance_ids - A tf.int32 tensor of size [batch_size, n]. outputs: A dictionart that contains embeddings - A tf.float32 tensor of size [batch_size, n, f]. lambda_coef: Regularization loss coefficient. regularization_type: Regularization loss type. Supported values are 'msq' and 'unit_length'. 'msq' stands for 'mean square' which penalizes the embedding vectors if they have a length far from zero. 'unit_length' penalizes the embedding vectors if they have a length far from one. is_intermediate: True if applied to intermediate predictions; otherwise, False. Returns: A tf.float32 scalar loss tensor. """ instance_ids_key = standard_fields.InputDataFields.object_instance_id_voxels num_voxels_key = standard_fields.InputDataFields.num_valid_voxels if is_intermediate: embedding_key = ( standard_fields.DetectionResultFields .intermediate_instance_embedding_voxels) else: embedding_key = ( standard_fields.DetectionResultFields.instance_embedding_voxels) if instance_ids_key not in inputs: raise ValueError('instance_ids is missing in inputs.') if embedding_key not in outputs: raise ValueError('embedding is missing in outputs.') if num_voxels_key not in inputs: raise ValueError('num_voxels is missing in inputs.') batch_size = inputs[num_voxels_key].get_shape().as_list()[0] if batch_size is None: raise ValueError('batch_size is not defined at graph construction time.') num_valid_voxels = inputs[num_voxels_key] num_voxels = tf.shape(inputs[instance_ids_key])[1] valid_mask = tf.less( tf.tile(tf.expand_dims(tf.range(num_voxels), axis=0), [batch_size, 1]), tf.expand_dims(num_valid_voxels, axis=1)) valid_mask = tf.reshape(valid_mask, [-1]) embedding_dims = outputs[embedding_key].get_shape().as_list()[-1] if embedding_dims is None: raise ValueError( 'Embedding dimension is unknown at graph construction time.') embedding = tf.reshape(outputs[embedding_key], [-1, embedding_dims]) embedding = tf.boolean_mask(embedding, valid_mask) return metric_learning_losses.regularization_loss( embedding=embedding, lambda_coef=lambda_coef, regularization_type=regularization_type)
def train_step(self, time_step: ActionTimeStep, state, calc_intrinsic_reward=True): """ Args: time_step (ActionTimeStep): input time_step data state (tuple): state for MISC (previous observation, previous previous action) calc_intrinsic_reward (bool): if False, only return the losses Returns: TrainStep: outputs: empty tuple () state: tuple of observation and previous action info: (MISCInfo): """ feature = time_step.observation prev_action = time_step.prev_action feature = tf.concat([feature_state, prev_action], axis=-1) prev_feature = tf.concat(state, axis=-1) feature_reshaped = tf.expand_dims(feature, axis=1) prev_feature_reshaped = tf.expand_dims(prev_feature, axis=1) feature_pair = tf.concat([prev_feature_reshaped, feature_reshaped], 1) feature_reshaped_tran = transpose2(feature_reshaped, 1, 0) def add_batch(): self._buffer.add_batch(feature_reshaped_tran) if calc_intrinsic_reward: add_batch() if self._n_objects < 2: obs_tau_excludes_goal, obs_tau_achieved_goal = \ self._split_observation_fn(feature_pair) loss = self._mine(obs_tau_excludes_goal, obs_tau_achieved_goal) elif self._n_objects == 2: obs_tau_excludes_goal, obs_tau_achieved_goal_1, obs_tau_achieved_goal_2 \ = self._split_observation_fn( feature_pair) loss_1 = self._mine(obs_tau_excludes_goal, obs_tau_achieved_goal_1) loss_2 = self._mine(obs_tau_excludes_goal, obs_tau_achieved_goal_2) loss = loss_1 + loss_2 intrinsic_reward = () if calc_intrinsic_reward: # scale/normalize the MISC intrinsic reward if self._n_objects < 2: intrinsic_reward = tf.clip_by_value(self._mi_r_scale * loss, 0, 1) elif self._n_objects == 2: intrinsic_reward = tf.clip_by_value( self._mi_r_scale * loss_1, 0, 1) + 1 * tf.clip_by_value(self._mi_r_scale * loss_2, 0, 1) return AlgorithmStep( outputs=(), state=[feature_state, prev_action], \ info=MISCInfo(reward=intrinsic_reward))
def pointcloud_to_voxel_grid(points, features, grid_cell_size, start_location, end_location, segment_func=tf.math.unsorted_segment_mean): """Converts a pointcloud into a voxel grid. Args: points: A tf.float32 tensor of size [N, 3]. features: A tf.float32 tensor of size [N, F]. grid_cell_size: A tf.float32 tensor of size [3]. start_location: A tf.float32 tensor of size [3]. end_location: A tf.float32 tensor of size [3]. segment_func: A tensorflow function that operates on segments. Expect one of tf.math.unsorted_segment_{min/max/mean/prod/sum}. Defaults to tf.math.unsorted_segment_mean Returns: voxel_features: A tf.float32 tensor of size [grid_x_len, grid_y_len, grid_z_len, F]. segment_ids: A tf.int32 tensor of IDs for each point indicating which (flattened) voxel cell its data was mapped to. point_indices: A tf.int32 tensor of size [num_points, 3] containing the location of each point in the 3d voxel grid. """ grid_cell_size = tf.convert_to_tensor(grid_cell_size, dtype=tf.float32) start_location = tf.convert_to_tensor(start_location, dtype=tf.float32) end_location = tf.convert_to_tensor(end_location, dtype=tf.float32) point_indices = tf.cast( (points - tf.expand_dims(start_location, axis=0)) / tf.expand_dims(grid_cell_size, axis=0), dtype=tf.int32) grid_size = tf.cast( tf.math.ceil((end_location - start_location) / grid_cell_size), dtype=tf.int32) # Note: all points outside the grid are added to the edges # Cap index at grid_size - 1 (so a 10x10x10 grid's max cell is (9,9,9)) point_indices = tf.minimum(point_indices, tf.expand_dims(grid_size - 1, axis=0)) # Don't allow any points below index (0, 0, 0) point_indices = tf.maximum(point_indices, 0) segment_ids = tf.reduce_sum( point_indices * tf.stack( [grid_size[1] * grid_size[2], grid_size[2], 1], axis=0), axis=1) voxel_features = segment_func( data=features, segment_ids=segment_ids, num_segments=(grid_size[0] * grid_size[1] * grid_size[2])) return (tf.reshape(voxel_features, [grid_size[0], grid_size[1], grid_size[2], features.get_shape().as_list()[1]]), segment_ids, point_indices)
def classification_loss_using_mask_iou_func_unbatched( embeddings, instance_ids, sampled_embeddings, sampled_instance_ids, sampled_class_labels, sampled_logits, similarity_strategy, is_balanced): """Classification loss using mask iou. Args: embeddings: A tf.float32 tensor of size [n, f]. instance_ids: A tf.int32 tensor of size [n]. sampled_embeddings: A tf.float32 tensor of size [num_samples, f]. sampled_instance_ids: A tf.int32 tensor of size [num_samples]. sampled_class_labels: A tf.int32 tensor of size [num_samples, 1]. sampled_logits: A tf.float32 tensor of size [num_samples, num_classes]. similarity_strategy: Defines the method for computing similarity between embedding vectors. Possible values are 'dotproduct' and 'distance'. is_balanced: If True, the per-voxel losses are re-weighted to have equal total weight for foreground vs. background voxels. Returns: A tf.float32 loss scalar tensor. """ predicted_soft_masks = metric_learning_utils.embedding_centers_to_soft_masks( embedding=embeddings, centers=sampled_embeddings, similarity_strategy=similarity_strategy) predicted_masks = tf.cast(tf.greater(predicted_soft_masks, 0.5), dtype=tf.float32) gt_masks = tf.cast(tf.equal(tf.expand_dims(sampled_instance_ids, axis=1), tf.expand_dims(instance_ids, axis=0)), dtype=tf.float32) pairwise_iou = instance_segmentation_utils.points_mask_pairwise_iou( masks1=predicted_masks, masks2=gt_masks) num_classes = sampled_logits.get_shape().as_list()[1] sampled_class_labels_one_hot = tf.one_hot(indices=tf.reshape( sampled_class_labels, [-1]), depth=num_classes) sampled_class_labels_one_hot_fg = sampled_class_labels_one_hot[:, 1:] iou_coefs = tf.tile(tf.reshape(pairwise_iou, [-1, 1]), [1, num_classes - 1]) sampled_class_labels_one_hot_fg *= iou_coefs sampled_class_labels_one_hot_bg = tf.maximum( 1.0 - tf.math.reduce_sum( sampled_class_labels_one_hot_fg, axis=1, keepdims=True), 0.0) sampled_class_labels_one_hot = tf.concat( [sampled_class_labels_one_hot_bg, sampled_class_labels_one_hot_fg], axis=1) params = {} if is_balanced: weights = loss_utils.get_balanced_loss_weights_multiclass( labels=tf.expand_dims(sampled_instance_ids, axis=1)) params['weights'] = weights return classification_loss_fn(logits=sampled_logits, labels=sampled_class_labels_one_hot, **params)
def state_rewards(states, actions, rewards, next_states, contexts, weight_index=None, state_indices=None, weight_vector=1.0, offset_vector=0.0, summarize=False): """Returns the rewards that are linear mapping of next_states. Args: states: A [batch_size, num_state_dims] Tensor representing a batch of states. actions: A [batch_size, num_action_dims] Tensor representing a batch of actions. rewards: A [batch_size] Tensor representing a batch of rewards. next_states: A [batch_size, num_state_dims] Tensor representing a batch of next states. contexts: A list of [batch_size, num_context_dims] Tensor representing a batch of contexts. weight_index: (integer) Index of contexts lists that specify weighting. state_indices: (a list of Numpy integer array) Indices of states dimensions to be mapped. weight_vector: (a number or a list or Numpy array) The weighting vector, broadcastable to `next_states`. offset_vector: (a number or a list of Numpy array) The off vector. summarize: (boolean) enable summary ops. Returns: A new tf.float32 [batch_size] rewards Tensor, and tf.float32 [batch_size] discounts tensor. """ del states, actions, rewards # unused args stats = {} record_tensor(next_states, state_indices, stats) next_states = index_states(next_states, state_indices) weight = tf.constant( weight_vector, dtype=next_states.dtype, shape=next_states[0].shape) weights = tf.expand_dims(weight, 0) offset = tf.constant( offset_vector, dtype=next_states.dtype, shape=next_states[0].shape) offsets = tf.expand_dims(offset, 0) if weight_index is not None: weights *= contexts[weight_index] rewards = tf.to_float(tf.reduce_sum(weights * (next_states+offsets), axis=1)) if summarize: with tf.name_scope('RewardFn/'): summarize_stats(stats) return rewards, tf.ones_like(rewards)
def compute_logits(self, support_embeddings, query_embeddings, onehot_support_labels): """Computes the negative distances of each query point to each prototype.""" # [num test images, 1, embedding size]. query_embeddings = tf.expand_dims(query_embeddings, 1) prototypes = compute_prototypes(support_embeddings, onehot_support_labels) # [1, num_clases, embedding_size]. prototypes = tf.expand_dims(prototypes, 0) # Squared euclidean distances between each test embedding / prototype pair. distances = tf.reduce_sum(tf.square(query_embeddings - prototypes), 2) return -distances
def joint_log_likelihood(self, onehot_labels, log_probs): """Compute p(z, y).""" labels = tf.cast(tf.reduce_sum(input_tensor=onehot_labels, axis=0), dtype=tf.float32) class_log_probs = tf.math.log(labels / tf.reduce_sum(input_tensor=labels)) return log_probs + tf.expand_dims(class_log_probs, 0)
def slice_to_max_num_distractors_fn(inputs): """Reduces the number of distractors to the max number.""" label_for_ex, scores_for_ex = inputs scores_nocorrect = tf.concat([ scores_for_ex[0:label_for_ex], scores_for_ex[(label_for_ex + 1):] ], axis=0) random_start_index = tf.random.uniform( shape=[], minval=0, maxval=scores_for_ex.shape[0] - max_num_dist, dtype=tf.int32) new_scores = scores_nocorrect[ random_start_index:random_start_index + max_num_dist] # Put the groundtruth embedding in position 0 to make labels easy. new_scores = tf.concat([ tf.expand_dims(scores_for_ex[label_for_ex], 0), new_scores ], axis=0) return new_scores
def train_step(train_img, train_label): # Optimize the model loss_value, grads = grad(model, train_img, train_label) optimizer.apply_gradients(zip(grads, model.trainable_variables)) train_pred, _ = model(train_img) train_label = tf.expand_dims(train_label, axis=1) train_accuracy.update_state(train_label, train_pred)
def build_graph(self): """Builds the neural network graph.""" # define graph self.g = tf.Graph() with self.g.as_default(): # create and store a new session for the graph self.sess = tf.Session() # define placeholders self.x = tf.placeholder(shape=[None, self.dim_input], dtype=tf.float32) self.y = tf.placeholder(shape=[None, self.num_classes], dtype=tf.float32) # define simple model with tf.variable_scope('last_layer'): self.z = tf.layers.dense(inputs=self.x, units=self.num_classes) self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.y, logits=self.z)) self.output_probs = tf.nn.softmax(self.z) # Variables of the last layer self.ll_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) self.ll_vars_concat = tf.concat( [self.ll_vars[0], tf.expand_dims(self.ll_vars[1], axis=0)], 0) # Summary _variable_summaries(self.ll_vars_concat) # saving the weights of last layer when running bootstrap algorithm self.saver = tf.train.Saver(var_list=self.ll_vars) self.gd_opt = tf.train.GradientDescentOptimizer(self.step_size) # SGD optimizer for the last layer grads_vars_sgd = self.gd_opt.compute_gradients(self.loss) self.train_op = self.gd_opt.apply_gradients(grads_vars_sgd) for g, v in grads_vars_sgd: if g is not None: s = list(v.name) s[v.name.rindex(':')] = '_' tf.summary.histogram(''.join(s) + '/grad_hist_boot_sgd', g) # Merge all the summaries and write them out self.all_summaries = tf.summary.merge_all() location = os.path.join(self.working_dir, 'logs') self.writer = tf.summary.FileWriter(location, graph=self.g) saver_network = tf.train.Saver(var_list=self.ll_vars) print('Loading the network...') # Restores from checkpoint saver_network.restore(self.sess, self.model_dir) print('Graph successfully loaded.')
def compute_logits(self, support_embeddings, query_embeddings, onehot_support_labels): """Computes the class logits for the episode. Args: support_embeddings: A Tensor of size [num_support_images, embedding dim]. query_embeddings: A Tensor of size [num_query_images, embedding dim]. onehot_support_labels: A Tensor of size [batch size, way]. Returns: The query set logits as a [num_query_images, way] matrix. Raises: ValueError: Distance must be one of l2 or cosine. """ if self.knn_in_fc: # Recompute the support and query embeddings that were originally computed # in self.forward_pass() to be the fc layer activations. support_embeddings = self.forward_pass_fc(support_embeddings) query_embeddings = self.forward_pass_fc(query_embeddings) # ------------------------ K-NN look up ------------------------------- # For each testing example in an episode, we use its embedding # vector to look for the closest neighbor in all the training examples' # embeddings from the same episode and then assign the training example's # class label to the testing example as the predicted class label for it. if self.distance == 'l2': # [1, num_support, embed_dims] support_embeddings = tf.expand_dims(support_embeddings, axis=0) # [num_query, 1, embed_dims] query_embeddings = tf.expand_dims(query_embeddings, axis=1) # [num_query, num_support] distance = tf.norm(query_embeddings - support_embeddings, axis=2) elif self.distance == 'cosine': support_embeddings = tf.nn.l2_normalize(support_embeddings, axis=1) query_embeddings = tf.nn.l2_normalize(query_embeddings, axis=1) distance = -1 * tf.matmul( query_embeddings, support_embeddings, transpose_b=True) else: raise ValueError('Distance must be one of l2 or cosine.') # [num_query] _, indices = tf.nn.top_k(-distance, k=1) indices = tf.squeeze(indices, axis=1) # [num_query, num_classes] query_logits = tf.gather(onehot_support_labels, indices) return query_logits
def gauss_kernel(x, D, gamma=1.): x = tf.expand_dims(x, axis=-1) if x.get_shape().ndims < 4: D = tf.reshape(D, (1, 1, -1)) else: D = tf.reshape(D, (1, 1, 1, 1, -1)) return tf.exp(- gamma * tf.square(x - D))
def _body_fn(i, indices_range, indices): """Computes the indices of the i-th point feature in each segment.""" indices_i = tf.math.unsorted_segment_max( data=indices_range, segment_ids=segment_ids, num_segments=num_segments) indices_i_positive_mask = tf.greater(indices_i, 0) indices_i_positive = tf.boolean_mask(indices_i, indices_i_positive_mask) boolean_mask = tf.scatter_nd( indices=tf.cast( tf.expand_dims(indices_i_positive - 1, axis=1), dtype=tf.int64), updates=tf.ones_like(indices_i_positive, dtype=tf.int32), shape=(n,)) indices_range *= (1 - boolean_mask) indices_i *= tf.cast(indices_i_positive_mask, dtype=tf.int32) indices_i = tf.pad( tf.expand_dims(indices_i, axis=1), paddings=[[0, 0], [i, num_samples_per_voxel - i - 1]]) indices += indices_i i = i + 1 return i, indices_range, indices
def flip_normals_towards_viewpoint(points, normals, viewpoint): """Flips the normals to face towards the view point. Args: points: A tf.float32 tensor of size [N, 3]. normals: A tf.float32 tensor of size [N, 3]. viewpoint: A tf.float32 tensor of size [3]. Returns: flipped_normals: A tf.float32 tensor of size [N, 3]. """ # (viewpoint - point) view_vector = tf.expand_dims(viewpoint, axis=0) - points # Dot product between the (viewpoint - point) and the plane normal cos_theta = tf.expand_dims(tf.reduce_sum(view_vector * normals, axis=1), axis=1) # Revert normals where cos is negative. normals *= tf.sign(tf.tile(cos_theta, [1, 3])) return normals
def reset_rewards(states, actions, rewards, next_states, contexts, reset_index=0, reset_state=None, reset_reward_function=None, include_forward_rewards=True, include_reset_rewards=True): """Returns the rewards for a forward/reset agent. Args: states: A [batch_size, num_state_dims] Tensor representing a batch of states. actions: A [batch_size, num_action_dims] Tensor representing a batch of actions. rewards: A [batch_size] Tensor representing a batch of rewards. next_states: A [batch_size, num_state_dims] Tensor representing a batch of next states. contexts: A list of [batch_size, num_context_dims] Tensor representing a batch of contexts. reset_index: (integer) The context list index that specifies reset. reset_state: Reset state. reset_reward_function: Reward function for reset step. include_forward_rewards: Include the rewards from the forward pass. include_reset_rewards: Include the rewards from the reset pass. Returns: A new tf.float32 [batch_size] rewards Tensor, and tf.float32 [batch_size] discounts tensor. """ reset_state = tf.constant( reset_state, dtype=next_states.dtype, shape=next_states.shape) reset_states = tf.expand_dims(reset_state, 0) def true_fn(): if include_reset_rewards: return reset_reward_function(states, actions, rewards, next_states, [reset_states] + contexts[1:]) else: return tf.zeros_like(rewards), tf.ones_like(rewards) def false_fn(): if include_forward_rewards: return plain_rewards(states, actions, rewards, next_states, contexts) else: return tf.zeros_like(rewards), tf.ones_like(rewards) rewards, discounts = tf.cond( tf.cast(contexts[reset_index][0, 0], dtype=tf.bool), true_fn, false_fn) return rewards, discounts
def _prepare_lidar_points(inputs, lidar_names): """Integrates and returns the lidar points in vehicle coordinate frame.""" points_position = [] points_intensity = [] points_elongation = [] points_normal = [] points_in_image_frame_xy = [] points_in_image_frame_id = [] for lidar_name in lidar_names: lidar_location = tf.reshape( inputs[('lidars/%s/extrinsics/t') % lidar_name], [-1, 3]) inside_no_label_zone = tf.reshape( inputs[('lidars/%s/pointcloud/inside_nlz' % lidar_name)], [-1]) valid_points_mask = tf.math.logical_not(inside_no_label_zone) points_position_current_lidar = tf.boolean_mask( inputs[('lidars/%s/pointcloud/positions' % lidar_name)], valid_points_mask) points_position.append(points_position_current_lidar) points_intensity.append( tf.boolean_mask( inputs[('lidars/%s/pointcloud/intensity' % lidar_name)], valid_points_mask)) points_elongation.append( tf.boolean_mask( inputs[('lidars/%s/pointcloud/elongation' % lidar_name)], valid_points_mask)) points_to_lidar_vectors = lidar_location - points_position_current_lidar points_normal_direction = points_to_lidar_vectors / tf.expand_dims( tf.norm(points_to_lidar_vectors, axis=1), axis=1) points_normal.append(points_normal_direction) points_in_image_frame_xy.append( tf.boolean_mask( inputs['lidars/%s/camera_projections/positions' % lidar_name], valid_points_mask)) points_in_image_frame_id.append( tf.boolean_mask( inputs['lidars/%s/camera_projections/ids' % lidar_name], valid_points_mask)) points_position = tf.concat(points_position, axis=0) points_intensity = tf.concat(points_intensity, axis=0) points_elongation = tf.concat(points_elongation, axis=0) points_normal = tf.concat(points_normal, axis=0) points_in_image_frame_xy = tf.concat(points_in_image_frame_xy, axis=0) points_in_image_frame_id = tf.cast(tf.concat(points_in_image_frame_id, axis=0), dtype=tf.int32) points_in_image_frame_yx = tf.cast(tf.reverse(points_in_image_frame_xy, axis=[-1]), dtype=tf.int32) return (points_position, points_intensity, points_elongation, points_normal, points_in_image_frame_yx, points_in_image_frame_id)
def identity_knn_graph(points, num_valid_points, k): # pylint: disable=unused-argument """Returns each points as its own neighbor k times. Args: points: A tf.float32 tensor of size [num_batches, N, D] where D is the point dimensions. num_valid_points: A tf.int32 tensor of size [num_batches] containing the number of valid points in each batch example. k: Number of neighbors for each point. Returns: distances: A tf.float32 tensor of [num_batches, N, k]. Distances is all zeros since each point is returned as its own neighbor. indices: A tf.int32 tensor of [num_batches, N, k]. Each row will contain values that are identical to the index of that row. """ num_batches = points.get_shape()[0] num_points = tf.shape(points)[1] indices = tf.expand_dims(tf.range(num_points), axis=1) indices = tf.tile(tf.expand_dims(indices, axis=0), [num_batches, 1, k]) distances = tf.zeros([num_batches, num_points, k], dtype=tf.float32) return distances, indices
def gauss_kernel2D(x, Dx, Dy, gamma=1.): h_size = (x.get_shape()[-1].value) // 2 x = tf.expand_dims(x, axis=-1) if x.get_shape().ndims < 4: Dx = tf.reshape(Dx, (1, 1, -1)) Dy = tf.reshape(Dy, (1, 1, -1)) x1, x2 = x[:, :h_size], x[:, h_size:] else: Dy = tf.reshape(Dy, (1, 1, 1, 1, -1)) Dx = tf.reshape(Dx, (1, 1, 1, 1, -1)) x1, x2 = x[:, :, :, :h_size], x[:, :, :, h_size:] gauss_kernel = tf.exp(-gamma * tf.square(x1 - Dx)) + tf.exp(- gamma * tf.square(x2 - Dy)) return gauss_kernel
def tf_random_choice(inputs, n_samples): """ With replacement. Params: inputs (Tensor): Shape [n_states, n_features] n_samples (int): The number of random samples to take. Returns: sampled_inputs (Tensor): Shape [n_samples, n_features] """ # (1, n_states) since multinomial requires 2D logits. uniform_log_prob = tf.expand_dims(tf.zeros(tf.shape(inputs)[0]), 0) ind = tf.multinomial(uniform_log_prob, n_samples) ind = tf.squeeze(ind, 0, name="random_choice_ind") # (n_samples,) return tf.gather(inputs, ind, name="random_choice")
def _expand_to_population(self, data): """Expand the input tensor to a population of replications Args: data (tf.Tensor): input data with shape [batch_size, ...] Returns: data_population (tf.Tensor) with shape [batch_size * self._population_size, ...]. For example data tensor [[a, b], [c, d]] and a population_size of 2, we have the following data_population tensor as output [[a, b], [a, b], [c, d], [c, d]] """ data_population = tf.tile(tf.expand_dims( data, 1), [1, self._population_size] + [1] * len(data.shape[1:])) data_population = tf.reshape(data_population, [-1] + data.shape[1:].as_list()) return data_population
def compute_semantic_labels(inputs, points_key, box_margin=0.1): """Computes ground-truth semantic labels of the points. If a point falls inside an object box, assigns it to the label of that box. Otherwise the point is assigned to background (unknown) which is label 0. Args: inputs: A dictionary containing points and objects. points_key: A string corresponding to the tensor of point positions in inputs. box_margin: A margin by which object boxes are grown. Useful to make sure points on the object box boundary fall inside the object. Returns: A tf.int32 tensor of size [num_points, 1] containing point semantic labels. Raises: ValueError: If the required object or point keys are not in inputs. """ if points_key not in inputs: raise ValueError(('points_key: %s not in inputs.' % points_key)) if 'objects/shape/dimension' not in inputs: raise ValueError('`objects/shape/dimension` not in inputs.') if 'objects/pose/R' not in inputs: raise ValueError('`objects/pose/R` not in inputs.') if 'objects/pose/t' not in inputs: raise ValueError('`objects/pose/t` not in inputs.') if 'objects/category/label' not in inputs: raise ValueError('`objects/category/label` not in inputs.') point_positions = inputs[points_key] boxes_length = inputs['objects/shape/dimension'][:, 0:1] boxes_width = inputs['objects/shape/dimension'][:, 1:2] boxes_height = inputs['objects/shape/dimension'][:, 2:3] boxes_rotation_matrix = inputs['objects/pose/R'] boxes_center = inputs['objects/pose/t'] boxes_label = tf.expand_dims(inputs['objects/category/label'], axis=1) boxes_label = tf.pad(boxes_label, paddings=[[1, 0], [0, 0]]) points_box_index = box_utils.map_points_to_boxes( points=point_positions, boxes_length=boxes_length, boxes_height=boxes_height, boxes_width=boxes_width, boxes_rotation_matrix=boxes_rotation_matrix, boxes_center=boxes_center, box_margin=box_margin) return tf.gather(boxes_label, points_box_index + 1)
def plot_to_image(figure): """ Converts the matplotlib plot specified by 'figure' to a PNG image and returns it. The supplied figure is closed and inaccessible after the call """ # Save the plot to a PNG in memory buf = io.BytesIO() plt.savefig(buf, format='png') # Closing the figure prevents it from being displayed directly inside the notebook. plt.close(figure) buf.seek(0) # Convert PNG buffer to TF image image = tf.image.decode_png(buf.getvalue(), channels=4) # Add the batch dimension image = tf.expand_dims(image, 0) return image
def identity_knn_graph_unbatched(points, k): """Returns each points as its own neighbor k times. Args: points: A tf.float32 tensor of [N, D] where D is the point dimensions. k: Number of neighbors for each point. Returns: distances: A tf.float32 tensor of [N, k]. Distances is all zeros since each point is returned as its own neighbor. indices: A tf.int32 tensor of [N, k]. Each row will contain values that are identical to the index of that row. """ num_points = tf.shape(points)[0] indices = tf.expand_dims(tf.range(num_points), axis=1) indices = tf.tile(indices, [1, k]) distances = tf.zeros([num_points, k], dtype=tf.float32) return distances, indices
def _box_classification_loss_unbatched(inputs_1, outputs_1, is_intermediate, is_balanced, mine_hard_negatives, hard_negative_score_threshold): """Loss function for input and outputs of batch size 1.""" valid_mask = _get_voxels_valid_mask(inputs_1=inputs_1) if is_intermediate: logits = outputs_1[standard_fields.DetectionResultFields. intermediate_object_semantic_voxels] else: logits = outputs_1[ standard_fields.DetectionResultFields.object_semantic_voxels] num_classes = logits.get_shape().as_list()[-1] if num_classes is None: raise ValueError('Number of classes is unknown.') logits = tf.boolean_mask(tf.reshape(logits, [-1, num_classes]), valid_mask) labels = tf.boolean_mask( tf.reshape( inputs_1[standard_fields.InputDataFields.object_class_voxels], [-1, 1]), valid_mask) if mine_hard_negatives or is_balanced: instances = tf.boolean_mask( tf.reshape( inputs_1[ standard_fields.InputDataFields.object_instance_id_voxels], [-1]), valid_mask) params = {} if mine_hard_negatives: negative_scores = tf.reshape(tf.nn.softmax(logits)[:, 0], [-1]) hard_negative_mask = tf.logical_and( tf.less(negative_scores, hard_negative_score_threshold), tf.equal(tf.reshape(labels, [-1]), 0)) hard_negative_labels = tf.boolean_mask(labels, hard_negative_mask) hard_negative_logits = tf.boolean_mask(logits, hard_negative_mask) hard_negative_instances = tf.boolean_mask( tf.ones_like(instances) * (tf.reduce_max(instances) + 1), hard_negative_mask) logits = tf.concat([logits, hard_negative_logits], axis=0) instances = tf.concat([instances, hard_negative_instances], axis=0) labels = tf.concat([labels, hard_negative_labels], axis=0) if is_balanced: weights = loss_utils.get_balanced_loss_weights_multiclass( labels=tf.expand_dims(instances, axis=1)) params['weights'] = weights return classification_loss_fn(logits=logits, labels=labels, **params)