def pick_labeled_image(mesh_inputs, view_image_inputs, view_indices_2d_inputs, view_name): """Pick the image with most number of labeled points projecting to it.""" if view_name not in view_image_inputs: return if view_name not in view_indices_2d_inputs: return if standard_fields.InputDataFields.point_loss_weights not in mesh_inputs: raise ValueError('The key `weights` is missing from mesh_inputs.') height = tf.shape(view_image_inputs[view_name])[1] width = tf.shape(view_image_inputs[view_name])[2] valid_points_y = tf.logical_and( tf.greater_equal(view_indices_2d_inputs[view_name][:, :, 0], 0), tf.less(view_indices_2d_inputs[view_name][:, :, 0], height)) valid_points_x = tf.logical_and( tf.greater_equal(view_indices_2d_inputs[view_name][:, :, 1], 0), tf.less(view_indices_2d_inputs[view_name][:, :, 1], width)) valid_points = tf.logical_and(valid_points_y, valid_points_x) image_total_weights = tf.reduce_sum( tf.cast(valid_points, dtype=tf.float32) * tf.squeeze( mesh_inputs[standard_fields.InputDataFields.point_loss_weights], axis=1), axis=1) image_total_weights = tf.cond( tf.equal(tf.reduce_sum(image_total_weights), 0), lambda: tf.reduce_sum(tf.cast(valid_points, dtype=tf.float32), axis=1), lambda: image_total_weights) best_image = tf.math.argmax(image_total_weights) view_image_inputs[view_name] = view_image_inputs[view_name][ best_image:best_image + 1, :, :, :] view_indices_2d_inputs[view_name] = view_indices_2d_inputs[view_name][ best_image:best_image + 1, :, :]
def _positions_center_origin(height, width): """Returns image coordinates where the origin at the image center.""" h = tf.range(0.0, height, 1) w = tf.range(0.0, width, 1) center_h = tf.cast(height, tf.float32) / 2.0 - 0.5 center_w = tf.cast(width, tf.float32) / 2.0 - 0.5 return tf.stack(tf.meshgrid(h - center_h, w - center_w, indexing='ij'), -1)
def update_state(self, inputs, outputs): """Function that updates the metric state at each example. Args: inputs: A dictionary containing input tensors. outputs: A dictionary containing output tensors. Returns: Update op. """ detections_score = tf.reshape( outputs[standard_fields.DetectionResultFields.objects_score], [-1]) detections_class = tf.reshape( outputs[standard_fields.DetectionResultFields.objects_class], [-1]) num_detections = tf.shape(detections_score)[0] detections_instance_mask = tf.reshape( outputs[ standard_fields.DetectionResultFields.instance_segments_voxel_mask], [num_detections, -1]) gt_class = tf.reshape(inputs[standard_fields.InputDataFields.objects_class], [-1]) num_gt = tf.shape(gt_class)[0] gt_voxel_instance_ids = tf.reshape( inputs[standard_fields.InputDataFields.object_instance_id_voxels], [-1]) gt_instance_masks = tf.transpose( tf.one_hot(gt_voxel_instance_ids - 1, depth=num_gt, dtype=tf.float32)) for c in self.class_range: gt_mask_c = tf.equal(gt_class, c) num_gt_c = tf.math.reduce_sum(tf.cast(gt_mask_c, dtype=tf.int32)) gt_instance_masks_c = tf.boolean_mask(gt_instance_masks, gt_mask_c) detections_mask_c = tf.equal(detections_class, c) num_detections_c = tf.math.reduce_sum( tf.cast(detections_mask_c, dtype=tf.int32)) if num_detections_c == 0: continue det_scores_c = tf.boolean_mask(detections_score, detections_mask_c) det_instance_mask_c = tf.boolean_mask(detections_instance_mask, detections_mask_c) det_scores_c, sorted_indices = tf.math.top_k( det_scores_c, k=num_detections_c) det_instance_mask_c = tf.gather(det_instance_mask_c, sorted_indices) tp_c = tf.zeros([num_detections_c], dtype=tf.int32) if num_gt_c > 0: ious_c = instance_segmentation_utils.points_mask_iou( masks1=gt_instance_masks_c, masks2=det_instance_mask_c) max_overlap_gt_ids = tf.cast( tf.math.argmax(ious_c, axis=0), dtype=tf.int32) is_gt_box_detected = tf.zeros([num_gt_c], dtype=tf.int32) for i in tf.range(num_detections_c): gt_id = max_overlap_gt_ids[i] if (ious_c[gt_id, i] > self.iou_threshold and is_gt_box_detected[gt_id] == 0): tp_c = tf.maximum( tf.one_hot(i, num_detections_c, dtype=tf.int32), tp_c) is_gt_box_detected = tf.maximum( tf.one_hot(gt_id, num_gt_c, dtype=tf.int32), is_gt_box_detected) self.tp[c] = tf.concat([self.tp[c], tp_c], axis=0) self.scores[c] = tf.concat([self.scores[c], det_scores_c], axis=0) self.num_gt[c] += num_gt_c return tf.no_op()
def change_intensity_range(intensities, threshold=2.5, normalization_factor1=2500.0, normalization_factor2=12.0): """Changes the range of intensity values. Args: intensities: A tensor containing intensity values. It is assumed it has a range of 0 to around 65000. threshold: A parameter used for re-ranging intensity values. normalization_factor1: A parameter used for re-ranging intensity values. normalization_factor2: A parameter used for re-ranging intensity values. Returns: Tensor with re-ranged intensity values. """ intensities = tf.cast(intensities, dtype=tf.float32) intensities_large_mask = tf.cast(tf.greater(intensities, threshold), dtype=tf.float32) intensities_small = intensities * (1.0 - intensities_large_mask) intensities_large = ((threshold + (intensities - threshold) / normalization_factor2) * intensities_large_mask) return ( (intensities_small + intensities_large) / normalization_factor1) - 1.0
def _get_random_scaled_resolution(orig_height, orig_width, min_scale, max_scale, max_strech, probability_strech): """Computes a new random resolution.""" # Choose a random scale factor and compute new resolution. scale = 2**tf.random.uniform([], minval=min_scale, maxval=max_scale, dtype=tf.float32) scale_height = scale scale_width = scale # Possibly change scale values individually to perform strech def true_fn(scale_height, scale_width): scale_height *= 2**tf.random.uniform([], -max_strech, max_strech) scale_width *= 2**tf.random.uniform([], -max_strech, max_strech) return tf.stack((scale_height, scale_width), axis=0) def false_fn(scale_height, scale_width): return tf.stack((scale_height, scale_width), axis=0) perform_strech = tf.random.uniform([]) < probability_strech scales = tf.cond(perform_strech, lambda: true_fn(scale_height, scale_width), lambda: false_fn(scale_height, scale_width)) scale_height = scales[0] scale_width = scales[1] # Compute scaled image resolution. new_height = tf.cast( tf.math.ceil(tf.cast(orig_height, tf.float32) * scale_height), tf.int32) new_width = tf.cast( tf.math.ceil(tf.cast(orig_width, tf.float32) * scale_width), tf.int32) return new_height, new_width, scale
def random_rotation(images, flow=None, mask=None, max_rotation=10, not_empty_crop=True): """Performs a random rotation with the specified maximum rotation.""" angle_radian = tf.random.uniform( [], minval=-max_rotation, maxval=max_rotation, dtype=tf.float32) * pi / 180.0 images = rotate(images, angle_radian, is_flow=False, mask=None) if not_empty_crop: orig_height = tf.shape(images)[-3] orig_width = tf.shape(images)[-2] # introduce abbreviations for shorter notation cos = tf.math.cos(angle_radian % pi) sin = tf.math.sin(angle_radian % pi) h = tf.cast(orig_height, tf.float32) w = tf.cast(orig_width, tf.float32) # compute required scale factor scale = tf.cond( tf.math.less(angle_radian % pi, pi / 2.0), lambda: tf.math.maximum( (w / h) * sin + cos, (h / w) * sin + cos), lambda: tf.math.maximum((w / h) * sin - cos, (h / w) * sin - cos)) new_height = tf.math.floor(h / scale) new_width = tf.math.floor(w / scale) # crop image again to original size offset_height = tf.cast((h - new_height) / 2, tf.int32) offset_width = tf.cast((w - new_width) / 2, tf.int32) images = tf.image.crop_to_bounding_box( images, offset_height=offset_height, offset_width=offset_width, target_height=tf.cast(new_height, tf.int32), target_width=tf.cast(new_width, tf.int32)) if flow is not None: flow, mask = rotate(flow, angle_radian, is_flow=True, mask=mask) if not_empty_crop: # crop flow and mask again to original size flow = tf.image.crop_to_bounding_box( flow, offset_height=offset_height, offset_width=offset_width, target_height=tf.cast(new_height, tf.int32), target_width=tf.cast(new_width, tf.int32)) mask = tf.image.crop_to_bounding_box( mask, offset_height=offset_height, offset_width=offset_width, target_height=tf.cast(new_height, tf.int32), target_width=tf.cast(new_width, tf.int32)) return images, flow, mask
def pointcloud_to_voxel_grid(points, features, grid_cell_size, start_location, end_location, segment_func=tf.math.unsorted_segment_mean): """Converts a pointcloud into a voxel grid. Args: points: A tf.float32 tensor of size [N, 3]. features: A tf.float32 tensor of size [N, F]. grid_cell_size: A tf.float32 tensor of size [3]. start_location: A tf.float32 tensor of size [3]. end_location: A tf.float32 tensor of size [3]. segment_func: A tensorflow function that operates on segments. Expect one of tf.math.unsorted_segment_{min/max/mean/prod/sum}. Defaults to tf.math.unsorted_segment_mean Returns: voxel_features: A tf.float32 tensor of size [grid_x_len, grid_y_len, grid_z_len, F]. segment_ids: A tf.int32 tensor of IDs for each point indicating which (flattened) voxel cell its data was mapped to. point_indices: A tf.int32 tensor of size [num_points, 3] containing the location of each point in the 3d voxel grid. """ grid_cell_size = tf.convert_to_tensor(grid_cell_size, dtype=tf.float32) start_location = tf.convert_to_tensor(start_location, dtype=tf.float32) end_location = tf.convert_to_tensor(end_location, dtype=tf.float32) point_indices = tf.cast( (points - tf.expand_dims(start_location, axis=0)) / tf.expand_dims(grid_cell_size, axis=0), dtype=tf.int32) grid_size = tf.cast( tf.math.ceil((end_location - start_location) / grid_cell_size), dtype=tf.int32) # Note: all points outside the grid are added to the edges # Cap index at grid_size - 1 (so a 10x10x10 grid's max cell is (9,9,9)) point_indices = tf.minimum(point_indices, tf.expand_dims(grid_size - 1, axis=0)) # Don't allow any points below index (0, 0, 0) point_indices = tf.maximum(point_indices, 0) segment_ids = tf.reduce_sum( point_indices * tf.stack( [grid_size[1] * grid_size[2], grid_size[2], 1], axis=0), axis=1) voxel_features = segment_func( data=features, segment_ids=segment_ids, num_segments=(grid_size[0] * grid_size[1] * grid_size[2])) return (tf.reshape(voxel_features, [grid_size[0], grid_size[1], grid_size[2], features.get_shape().as_list()[1]]), segment_ids, point_indices)
def classification_loss_using_mask_iou_func_unbatched( embeddings, instance_ids, sampled_embeddings, sampled_instance_ids, sampled_class_labels, sampled_logits, similarity_strategy, is_balanced): """Classification loss using mask iou. Args: embeddings: A tf.float32 tensor of size [n, f]. instance_ids: A tf.int32 tensor of size [n]. sampled_embeddings: A tf.float32 tensor of size [num_samples, f]. sampled_instance_ids: A tf.int32 tensor of size [num_samples]. sampled_class_labels: A tf.int32 tensor of size [num_samples, 1]. sampled_logits: A tf.float32 tensor of size [num_samples, num_classes]. similarity_strategy: Defines the method for computing similarity between embedding vectors. Possible values are 'dotproduct' and 'distance'. is_balanced: If True, the per-voxel losses are re-weighted to have equal total weight for foreground vs. background voxels. Returns: A tf.float32 loss scalar tensor. """ predicted_soft_masks = metric_learning_utils.embedding_centers_to_soft_masks( embedding=embeddings, centers=sampled_embeddings, similarity_strategy=similarity_strategy) predicted_masks = tf.cast(tf.greater(predicted_soft_masks, 0.5), dtype=tf.float32) gt_masks = tf.cast(tf.equal(tf.expand_dims(sampled_instance_ids, axis=1), tf.expand_dims(instance_ids, axis=0)), dtype=tf.float32) pairwise_iou = instance_segmentation_utils.points_mask_pairwise_iou( masks1=predicted_masks, masks2=gt_masks) num_classes = sampled_logits.get_shape().as_list()[1] sampled_class_labels_one_hot = tf.one_hot(indices=tf.reshape( sampled_class_labels, [-1]), depth=num_classes) sampled_class_labels_one_hot_fg = sampled_class_labels_one_hot[:, 1:] iou_coefs = tf.tile(tf.reshape(pairwise_iou, [-1, 1]), [1, num_classes - 1]) sampled_class_labels_one_hot_fg *= iou_coefs sampled_class_labels_one_hot_bg = tf.maximum( 1.0 - tf.math.reduce_sum( sampled_class_labels_one_hot_fg, axis=1, keepdims=True), 0.0) sampled_class_labels_one_hot = tf.concat( [sampled_class_labels_one_hot_bg, sampled_class_labels_one_hot_fg], axis=1) params = {} if is_balanced: weights = loss_utils.get_balanced_loss_weights_multiclass( labels=tf.expand_dims(sampled_instance_ids, axis=1)) params['weights'] = weights return classification_loss_fn(logits=sampled_logits, labels=sampled_class_labels_one_hot, **params)
def _prepare_lidar_points(inputs, lidar_names): """Integrates and returns the lidar points in vehicle coordinate frame.""" points_position = [] points_intensity = [] points_elongation = [] points_normal = [] points_in_image_frame_xy = [] points_in_image_frame_id = [] for lidar_name in lidar_names: lidar_location = tf.reshape( inputs[('lidars/%s/extrinsics/t') % lidar_name], [-1, 3]) inside_no_label_zone = tf.reshape( inputs[('lidars/%s/pointcloud/inside_nlz' % lidar_name)], [-1]) valid_points_mask = tf.math.logical_not(inside_no_label_zone) points_position_current_lidar = tf.boolean_mask( inputs[('lidars/%s/pointcloud/positions' % lidar_name)], valid_points_mask) points_position.append(points_position_current_lidar) points_intensity.append( tf.boolean_mask( inputs[('lidars/%s/pointcloud/intensity' % lidar_name)], valid_points_mask)) points_elongation.append( tf.boolean_mask( inputs[('lidars/%s/pointcloud/elongation' % lidar_name)], valid_points_mask)) points_to_lidar_vectors = lidar_location - points_position_current_lidar points_normal_direction = points_to_lidar_vectors / tf.expand_dims( tf.norm(points_to_lidar_vectors, axis=1), axis=1) points_normal.append(points_normal_direction) points_in_image_frame_xy.append( tf.boolean_mask( inputs['lidars/%s/camera_projections/positions' % lidar_name], valid_points_mask)) points_in_image_frame_id.append( tf.boolean_mask( inputs['lidars/%s/camera_projections/ids' % lidar_name], valid_points_mask)) points_position = tf.concat(points_position, axis=0) points_intensity = tf.concat(points_intensity, axis=0) points_elongation = tf.concat(points_elongation, axis=0) points_normal = tf.concat(points_normal, axis=0) points_in_image_frame_xy = tf.concat(points_in_image_frame_xy, axis=0) points_in_image_frame_id = tf.cast(tf.concat(points_in_image_frame_id, axis=0), dtype=tf.int32) points_in_image_frame_yx = tf.cast(tf.reverse(points_in_image_frame_xy, axis=[-1]), dtype=tf.int32) return (points_position, points_intensity, points_elongation, points_normal, points_in_image_frame_yx, points_in_image_frame_id)
def convert_to_simclr_episode(support_images=None, support_labels=None, support_class_ids=None, query_images=None, query_labels=None, query_class_ids=None): """Convert a single episode into a SimCLR Episode.""" # If there were k query examples of class c, keep the first k support # examples of class c as 'simclr' queries. We do this by assigning an # id for each image in the query set, implemented as label*1e5+x+1, where # x is the number of images of the same label with a lower index within # the query set. We do the same for the support set, which gives us a # mapping between query and support images which is injective (as long # as there's enough support-set images of each class). # # note: assumes max support label is 10000 - max_images_per_class query_idx_within_class = tf.cast( tf.equal(query_labels[tf.newaxis, :], query_labels[:, tf.newaxis]), tf.int32) query_idx_within_class = tf.linalg.diag_part( tf.cumsum(query_idx_within_class, axis=1)) query_uid = query_labels * 10000 + query_idx_within_class support_idx_within_class = tf.cast( tf.equal(support_labels[tf.newaxis, :], support_labels[:, tf.newaxis]), tf.int32) support_idx_within_class = tf.linalg.diag_part( tf.cumsum(support_idx_within_class, axis=1)) support_uid = support_labels * 10000 + support_idx_within_class # compute which support-set images have matches in the query set, and # discard the rest to produce the new query set. support_keep = tf.reduce_any(tf.equal(support_uid[:, tf.newaxis], query_uid[tf.newaxis, :]), axis=1) query_images = tf.boolean_mask(support_images, support_keep) support_labels = tf.range(tf.shape(support_labels)[0], dtype=support_labels.dtype) query_labels = tf.boolean_mask(support_labels, support_keep) query_class_ids = tf.boolean_mask(support_class_ids, support_keep) # Finally, apply SimCLR augmentation to all images. # Note simclr only blurs one image. query_images = simclr_augment(query_images, blur=True) support_images = simclr_augment(support_images) return (support_images, support_labels, support_class_ids, query_images, query_labels, query_class_ids)
def learning_rate_schedule_noam(train_steps, warmup_steps=10000, linear_decay_fraction=0.1, multiplier=1.0): """Noam's favorite learning-rate schedule. (rsqrt(max(step_num, warmup_steps)) * multiplier * min(1.0, (train_steps-step_num)/(train_steps*linear_decay_fraction))) Args: train_steps: a number warmup_steps: a number linear_decay_fraction: a number multiplier: a number Returns: a tf.scalar """ train_steps = float(train_steps) step_num = tf.cast(tf.get_global_step(), tf.float32) learning_rate = tf.math.rsqrt(tf.maximum(step_num, warmup_steps)) learning_rate *= multiplier if linear_decay_fraction > 0: learning_rate *= tf.minimum(1.0, (train_steps - step_num) / (train_steps * linear_decay_fraction)) return learning_rate
def _compute_prototype_loss(self, embeddings, labels, labels_one_hot, prototypes=None): """Computes the loss and accuracy on an episode.""" labels_dense = labels if prototypes is None: # Compute protos. labels = tf.cast(labels_one_hot, tf.float32) # [num examples, 1, embedding size]. embeddings_ = tf.expand_dims(embeddings, 1) # [num examples, num classes, 1]. labels = tf.expand_dims(labels, 2) # Sums each class' embeddings. [num classes, embedding size]. class_sums = tf.reduce_sum(labels * embeddings_, 0) # The prototype of each class is the averaged embedding of its examples. class_num_images = tf.reduce_sum(labels, 0) # [way]. prototypes = class_sums / class_num_images # [way, embedding size]. # Compute logits. embeddings = tf.nn.l2_normalize(embeddings, 1, epsilon=1e-3) prototypes = tf.nn.l2_normalize(prototypes, 1, epsilon=1e-3) logits = tf.matmul(embeddings, prototypes, transpose_b=True) loss = self.compute_loss(labels_one_hot, logits) acc = tf.reduce_mean(self.compute_accuracy(labels_dense, logits)) return loss, acc, prototypes, logits
def dqn_template(state, num_actions, layer_size=512, num_layers=1): r"""Builds a DQN Network mapping states to Q-values. Args: state: A `tf.placeholder` for the RL state. num_actions: int, number of actions that the RL agent can take. layer_size: int, number of hidden units per layer. num_layers: int, Number of hidden layers. Returns: net: A `tf.Graphdef` for DQN: `\theta : \mathcal{X}\rightarrow\mathbb{R}^{|\mathcal{A}|}` """ weights_initializer = slim.variance_scaling_initializer(factor=1.0 / np.sqrt(3.0), mode='FAN_IN', uniform=True) net = tf.cast(state, tf.float32) net = tf.squeeze(net, axis=2) for _ in range(num_layers): net = slim.fully_connected(net, layer_size, activation_fn=tf.nn.relu) net = slim.fully_connected(net, num_actions, activation_fn=None, weights_initializer=weights_initializer) return net
def _build_target_distribution(self): self._reshape_networks() batch_size = tf.shape(self._replay.rewards)[0] # size of rewards: batch_size x 1 rewards = self._replay.rewards[:, None] # size of tiled_support: batch_size x num_atoms tiled_support = tf.tile(self.support, [batch_size]) tiled_support = tf.reshape(tiled_support, [batch_size, self.num_atoms]) # size of target_support: batch_size x num_atoms is_terminal_multiplier = 1. - tf.cast(self._replay.terminals, tf.float32) # Incorporate terminal state to discount factor. # size of gamma_with_terminal: batch_size x 1 gamma_with_terminal = self.cumulative_gamma * is_terminal_multiplier gamma_with_terminal = gamma_with_terminal[:, None] target_support = rewards + gamma_with_terminal * tiled_support # size of next_probabilities: batch_size x num_actions x num_atoms next_probabilities = tf.contrib.layers.softmax( self._replay_next_logits) # size of next_qt: 1 x num_actions next_qt = tf.reduce_sum(self.support * next_probabilities, 2) # size of next_qt_argmax: 1 x batch_size next_qt_argmax = tf.argmax( next_qt + self._replay.next_legal_actions, axis=1)[:, None] batch_indices = tf.range(tf.to_int64(batch_size))[:, None] # size of next_qt_argmax: batch_size x 2 next_qt_argmax = tf.concat([batch_indices, next_qt_argmax], axis=1) # size of next_probabilities: batch_size x num_atoms next_probabilities = tf.gather_nd(next_probabilities, next_qt_argmax) return project_distribution(target_support, next_probabilities, self.support)
def rainbow_template(state, num_actions, num_atoms=51, layer_size=512, num_layers=2): # FIXME: Aron 3/14/19: changed from 1 to 2 r"""Builds a Rainbow Network mapping states to value distributions. Args: state: A `tf.placeholder` for the RL state. num_actions: int, number of actions that the RL agent can take. num_atoms: int, number of atoms to approximate the distribution with. layer_size: int, number of hidden units per layer. num_layers: int, number of hidden layers. Returns: net: A `tf.Graphdef` for Rainbow: `\theta : \mathcal{X}\rightarrow\mathbb{R}^{|\mathcal{A}| \times N}`, where `N` is num_atoms. """ weights_initializer = slim.variance_scaling_initializer( factor=1.0 / np.sqrt(3.0), mode='FAN_IN', uniform=True) net = tf.cast(state, tf.float32) net = tf.squeeze(net, axis=2) for _ in range(num_layers): net = slim.fully_connected(net, layer_size, activation_fn=tf.nn.relu) net = slim.fully_connected(net, num_actions * num_atoms, activation_fn=None, weights_initializer=weights_initializer) net = tf.reshape(net, [-1, num_actions, num_atoms]) return net
def train_complete(self, tape: tf.GradientTape, training_info: TrainingInfo, weight=1.0): """Complete one iteration of training. `train_complete` should calculate gradients and update parameters using those gradients. Args: tape (tf.GradientTape): the tape which are used for calculating gradient. All the previous `train_interval` `train_step()` for are called under the context of this tape. training_info (TrainingInfo): information collected for training. training_info.info are the batched from each policy_step.info returned by train_step() weight (float): weight for this batch. Loss will be multiplied with this weight before calculating gradient Returns: a tuple of the following: loss_info (LossInfo): loss information grads_and_vars (list[tuple]): list of gradient and variable tuples """ valid_masks = tf.cast( tf.not_equal(training_info.step_type, StepType.LAST), tf.float32) return super().train_complete(tape, training_info, valid_masks, weight)
def compute_class_distances(self, support_embeddings, onehot_support_labels, query_embeddings): """Returns the weighted distance of each query to each support example. Args: support_embeddings: Tensor of examples of shape [num_examples, embedding_dim] or [num_examples, spatial_dim, spatial_dim, num_filters]. onehot_support_labels: Tensor of targets of shape [num_examples, num_classes]. query_embeddings: Tensor of examples of shape [num_examples, embedding_dim] or [num_examples, spatial_dim, spatial_dim, num_filters]. Returns: Class log-probabilities computed as a weighted sum of one-hot encoded training labels. Weights for individual support-query pairs of examples are proportional to the distance between the embeddings of the two examples. """ # [num_query_images, num_support_images] similarities = 1 - self.distance_metric(query_embeddings, support_embeddings) attention = tf.nn.softmax(similarities) # [num_query_images, way] probs = tf.matmul(attention, tf.cast(onehot_support_labels, dtype=tf.float32)) return tf.math.log(probs)
def joint_log_likelihood(self, onehot_labels, log_probs): """Compute p(z, y).""" labels = tf.cast(tf.reduce_sum(input_tensor=onehot_labels, axis=0), dtype=tf.float32) class_log_probs = tf.math.log(labels / tf.reduce_sum(input_tensor=labels)) return log_probs + tf.expand_dims(class_log_probs, 0)
def _compute_prototypes(embeddings, labels): """Computes class prototypes over the last dimension of embeddings. Args: embeddings: Tensor of examples of shape [num_examples, embedding_size]. labels: Tensor of one-hot encoded labels of shape [num_examples, num_classes]. Returns: prototypes: Tensor of class prototypes of shape [num_classes, embedding_size]. """ labels = tf.cast(labels, tf.float32) # [num examples, 1, embedding size]. embeddings = tf.expand_dims(embeddings, 1) # [num examples, num classes, 1]. labels = tf.expand_dims(labels, 2) # Sums each class' embeddings. [num classes, embedding size]. class_sums = tf.reduce_sum(labels * embeddings, 0) # The prototype of each class is the averaged embedding of its examples. class_num_images = tf.reduce_sum(labels, 0) # [way]. prototypes = class_sums / class_num_images return prototypes
def compute_logits(self, support_embeddings, query_embeddings, onehot_support_labels): """Computes the class logits. Probabilities are computed as a weighted sum of one-hot encoded training labels. Weights for individual support/query pairs of examples are proportional to the (potentially semi-normalized) cosine distance between the embeddings of the two examples. Args: support_embeddings: A Tensor of size [num_support_images, embedding dim]. query_embeddings: A Tensor of size [num_query_images, embedding dim]. onehot_support_labels: A Tensor of size [batch size, way]. Returns: The query set logits as a [num_query_images, way] matrix. """ # Undocumented in the paper, but *very important*: *only* the support set # embeddings is L2-normalized, which means that the distance is not exactly # a cosine distance. For comparison we also allow for the actual cosine # distance to be computed, which is controlled with the # `exact_cosine_distance` instance attribute. support_embeddings = tf.nn.l2_normalize(support_embeddings, 1, epsilon=1e-3) if self.exact_cosine_distance: query_embeddings = tf.nn.l2_normalize(query_embeddings, 1, epsilon=1e-3) # [num_query_images, num_support_images] similarities = tf.matmul( query_embeddings, support_embeddings, transpose_b=True) attention = tf.nn.softmax(similarities) # [num_query_images, way] probs = tf.matmul(attention, tf.cast(onehot_support_labels, tf.float32)) return tf.log(probs)
def fit_gaussian(embeddings, damping=1e-7, full_covariance=False): """Fits a unimodal Gaussian distribution to `embeddings`. Args: embeddings: A [batch_size, embedding_dim] tf.Tensor of embeddings. damping: The scale of the covariance damping coefficient. full_covariance: Whether to use a full or diagonal covariance. Returns: Parameter estimates (means and log variances) for a Gaussian model. """ if full_covariance: num, dim = tf.split(tf.shape(input=embeddings), num_or_size_splits=2) num, dim = tf.squeeze(num), tf.squeeze(dim) sample_mean = tf.reduce_mean(input_tensor=embeddings, axis=0) centered_embeddings = embeddings - sample_mean sample_covariance = tf.einsum('ij,ik->kj', centered_embeddings, centered_embeddings) # Outer product. sample_covariance += damping * tf.eye(dim) # Positive definiteness. sample_covariance /= tf.cast(num, dtype=tf.float32) # Scale by N. return sample_mean, sample_covariance else: sample_mean, sample_variances = tf.nn.moments(x=embeddings) log_variances = tf.math.log(sample_variances + damping * tf.ones_like(sample_variances)) return sample_mean, log_variances
def compute_pointcloud_weights_based_on_voxel_density(points, grid_cell_size): """Computes pointcloud weights based on voxel density. Args: points: A tf.float32 tensor of size [num_points, 3]. grid_cell_size: The size of the grid cells in x, y, z dimensions in the voxel grid. It should be either a tf.float32 tensor, a numpy array or a list of size [3]. Returns: A tf.float32 tensor of size [num_points, 1] containing weights that are inverse proportional to the denisty of the points in voxels. """ num_points = tf.shape(points)[0] features = tf.ones([num_points, 1], dtype=tf.float32) voxel_features, _, segment_ids, _ = ( pointcloud_to_sparse_voxel_grid_unbatched( points=points, features=features, grid_cell_size=grid_cell_size, segment_func=tf.math.unsorted_segment_sum)) num_voxels = tf.shape(voxel_features)[0] point_features = sparse_voxel_grid_to_pointcloud( voxel_features=tf.expand_dims(voxel_features, axis=0), segment_ids=tf.expand_dims(segment_ids, axis=0), num_valid_voxels=tf.expand_dims(num_voxels, axis=0), num_valid_points=tf.expand_dims(num_points, axis=0)) inverse_point_densities = 1.0 / tf.squeeze(point_features, axis=0) total_inverse_density = tf.reduce_sum(inverse_point_densities) return (inverse_point_densities * tf.cast(num_points, dtype=tf.float32) / total_inverse_density)
def sdtw_loss(y_hat, y, gamma=0.01): y_hat = tf.cast(y_hat, tf.float64) y = tf.cast(y, tf.float64) D = batched_euclidean_distance(y, y_hat) R = tf.py_func(batch_sdtw_loss, inp=[D, gamma], Tout=tf.float64) m = D.get_shape()[1] loss = tf.reduce_mean(R[:, m, m]) loss.set_shape(shape=()) loss = tf.cast(loss, dtype=tf.float32) def grad(dy): _grad = tf.py_func(batch_sdtw_grad, inp=[y_hat, y, D, R, gamma], Tout=tf.float64) return tf.cast(_grad, dtype=tf.float32), tf.zeros_like(y, dtype=tf.float32) return loss, grad
def _points_to_voxel_indices(points, grid_cell_size): """Converts points into corresponding voxel indices. Maps each point into a voxel grid with cell size given by grid_cell_size. For each voxel, it computes a x, y, z index. Also converts the x, y, z index to a single number index where there is a one-on-one mapping between each x, y, z index value and its corresponding single number index value. Args: points: A tf.float32 tensor of size [N, 3]. grid_cell_size: The size of the grid cells in x, y, z dimensions in the voxel grid. It should be either a tf.float32 tensor, a numpy array or a list of size [3]. Returns: voxel_xyz_indices: A tf.int32 tensor of size [N, 3] containing the x, y, z index of the voxel corresponding to each given point. voxel_single_number_indices: A tf.int32 tensor of size [N] containing the single number index of the voxel corresponding to each given point. voxel_start_location: A tf.float32 tensor of size [3] containing the start location of the voxels. """ voxel_start_location = tf.reduce_min(points, axis=0) voxel_xyz_indices = tf.cast( tf.math.floordiv(points - voxel_start_location, grid_cell_size), dtype=tf.int32) voxel_xyz_indices, voxel_single_number_indices = compute_pooled_voxel_indices( voxel_xyz_indices=voxel_xyz_indices, pooling_size=(1, 1, 1)) return voxel_xyz_indices, voxel_single_number_indices, voxel_start_location
def _pad_or_clip_voxels(voxel_features, voxel_indices, num_valid_voxels, segment_ids, voxels_pad_or_clip_size): """Pads or clips voxels.""" if voxels_pad_or_clip_size: num_valid_voxels = tf.minimum(num_valid_voxels, voxels_pad_or_clip_size) num_channels = voxel_features.get_shape().as_list()[-1] if len(voxel_features.shape.as_list()) == 2: output_shape = [voxels_pad_or_clip_size, num_channels] elif len(voxel_features.shape.as_list()) == 3: num_samples_per_voxel = voxel_features.get_shape().as_list()[1] if num_samples_per_voxel is None: num_samples_per_voxel = tf.shape(voxel_features)[1] output_shape = [ voxels_pad_or_clip_size, num_samples_per_voxel, num_channels ] else: raise ValueError('voxel_features should be either rank 2 or 3.') voxel_features = shape_utils.pad_or_clip_nd( tensor=voxel_features, output_shape=output_shape) voxel_indices = shape_utils.pad_or_clip_nd( tensor=voxel_indices, output_shape=[voxels_pad_or_clip_size, 3]) valid_segment_ids_mask = tf.cast( tf.less(segment_ids, num_valid_voxels), dtype=tf.int32) segment_ids *= valid_segment_ids_mask return voxel_features, voxel_indices, num_valid_voxels, segment_ids
def _filter_valid_objects(inputs): """Removes the objects that do not contain 3d info. Args: inputs: A dictionary containing input tensors. """ if standard_fields.InputDataFields.objects_class not in inputs: return valid_objects_mask = tf.reshape( tf.greater(inputs[standard_fields.InputDataFields.objects_class], 0), [-1]) if standard_fields.InputDataFields.objects_has_3d_info in inputs: objects_with_3d_info = tf.reshape( tf.cast( inputs[standard_fields.InputDataFields.objects_has_3d_info], dtype=tf.bool), [-1]) valid_objects_mask = tf.logical_and(objects_with_3d_info, valid_objects_mask) if standard_fields.InputDataFields.objects_difficulty in inputs: valid_objects_mask = tf.logical_and( valid_objects_mask, tf.greater( tf.reshape( inputs[standard_fields.InputDataFields.objects_difficulty], [-1]), 0)) for key in _OBJECT_KEYS: if key in inputs: inputs[key] = tf.boolean_mask(inputs[key], valid_objects_mask)
def __call__(self, example_string): """Processes a single example string. Extracts and processes the image, and ignores the label. We assume that the image has three channels. Args: example_string: str, an Example protocol buffer. Returns: image_rescaled: the image, resized to `image_size x image_size` and rescaled to [-1, 1]. Note that Gaussian data augmentation may cause values to go beyond this range. """ image_decoded = read_example_and_parse_image(example_string)['image'] image_resized = tf.image.resize_images( image_decoded, [self.image_size, self.image_size], method=tf.image.ResizeMethod.BILINEAR, align_corners=True) image_resized = tf.cast(image_resized, tf.float32) image = 2 * (image_resized / 255.0 - 0.5) # Rescale to [-1, 1]. if self.data_augmentation is not None: if self.data_augmentation.enable_gaussian_noise: image = image + tf.random_normal(tf.shape( image)) * self.data_augmentation.gaussian_noise_std if self.data_augmentation.enable_jitter: j = self.data_augmentation.jitter_amount paddings = tf.constant([[j, j], [j, j], [0, 0]]) image = tf.pad(image, paddings, 'REFLECT') image = tf.image.random_crop( image, [self.image_size, self.image_size, 3]) return image
def _center_crop(images, height, width): """Performs a center crop with the given heights and width.""" # ensure height, width to be int height = tf.cast(height, tf.int32) width = tf.cast(width, tf.int32) # get current size images_shape = tf.shape(images) current_height = images_shape[-3] current_width = images_shape[-2] # compute required offset offset_height = tf.cast((current_height - height) / 2, tf.int32) offset_width = tf.cast((current_width - width) / 2, tf.int32) # perform the crop images = tf.image.crop_to_bounding_box( images, offset_height, offset_width, height, width) return images
def compute_target_topk_q(reward, gamma, next_actions, next_q_values, next_states, terminals): """Computes the optimal target Q value with the greedy algorithm. This algorithm corresponds to the method "TT" in Ie et al. https://arxiv.org/abs/1905.12767. Args: reward: [batch_size] tensor, the immediate reward. gamma: float, discount factor with the usual RL meaning. next_actions: [batch_size, slate_size] tensor, the next slate. next_q_values: [batch_size, num_of_documents] tensor, the q values of the documents in the next step. next_states: [batch_size, 1 + num_of_documents] tensor, the features for the user and the docuemnts in the next step. terminals: [batch_size] tensor, indicating if this is a terminal step. Returns: [batch_size] tensor, the target q values. """ slate_size = next_actions.get_shape().as_list()[1] scores, score_no_click = _get_unnormalized_scores(next_states) # Choose the documents with top affinity_scores * Q values to fill a slate and # treat it as if it is the optimal slate. unnormalized_next_q_target = next_q_values * scores _, topk_optimal_slate = tf.math.top_k(unnormalized_next_q_target, k=slate_size) # Get the expected Q-value of the slate containing top-K items. # [batch_size, slate_size] next_q_values_selected = tf.batch_gather( next_q_values, tf.cast(topk_optimal_slate, dtype=tf.int32)) # Get normalized affinity scores on the slate. # [batch_size, slate_size] scores_selected = tf.batch_gather( scores, tf.cast(topk_optimal_slate, dtype=tf.int32)) next_q_target_topk = tf.reduce_sum( input_tensor=next_q_values_selected * scores_selected, axis=1) / (tf.reduce_sum(input_tensor=scores_selected, axis=1) + score_no_click) return reward + gamma * next_q_target_topk * ( 1. - tf.cast(terminals, tf.float32))
def true_fn(images, flow, mask): # choose a random scale factor and compute new resolution orig_height = tf.shape(images)[-3] orig_width = tf.shape(images)[-2] new_height, new_width, scale = _get_random_scaled_resolution( orig_height=orig_height, orig_width=orig_width, min_scale=min_scale, max_scale=max_scale, max_strech=0.0, probability_strech=0.0) # rescale only the second image image_1, image_2 = tf.unstack(images) image_2 = smurf_utils.resize(image_2, new_height, new_width, is_flow=False) # Crop either first or second image to have matching dimensions if scale < 1.0: image_1 = _center_crop(image_1, new_height, new_width) else: image_2 = _center_crop(image_2, orig_height, orig_width) images = tf.stack([image_1, image_2]) if flow is not None: # get current locations (with the origin in the image center) positions = _positions_center_origin(orig_height, orig_width) # compute scale factor of the actual new image resolution scale_flow_h = tf.cast(new_height, tf.float32) / tf.cast( orig_height, tf.float32) scale_flow_w = tf.cast(new_width, tf.float32) / tf.cast( orig_width, tf.float32) scale_flow = tf.stack([scale_flow_h, scale_flow_w]) # compute augmented flow (multiply by mask to zero invalid flow locations) flow = ((positions + flow) * scale_flow - positions) * mask if scale < 1.0: # in case we downsample the image we crop the reference image to keep # the same shape flow = _center_crop(flow, new_height, new_width) mask = _center_crop(mask, new_height, new_width) return images, flow, mask