def pick_labeled_image(mesh_inputs, view_image_inputs, view_indices_2d_inputs, view_name): """Pick the image with most number of labeled points projecting to it.""" if view_name not in view_image_inputs: return if view_name not in view_indices_2d_inputs: return if standard_fields.InputDataFields.point_loss_weights not in mesh_inputs: raise ValueError('The key `weights` is missing from mesh_inputs.') height = tf.shape(view_image_inputs[view_name])[1] width = tf.shape(view_image_inputs[view_name])[2] valid_points_y = tf.logical_and( tf.greater_equal(view_indices_2d_inputs[view_name][:, :, 0], 0), tf.less(view_indices_2d_inputs[view_name][:, :, 0], height)) valid_points_x = tf.logical_and( tf.greater_equal(view_indices_2d_inputs[view_name][:, :, 1], 0), tf.less(view_indices_2d_inputs[view_name][:, :, 1], width)) valid_points = tf.logical_and(valid_points_y, valid_points_x) image_total_weights = tf.reduce_sum( tf.cast(valid_points, dtype=tf.float32) * tf.squeeze( mesh_inputs[standard_fields.InputDataFields.point_loss_weights], axis=1), axis=1) image_total_weights = tf.cond( tf.equal(tf.reduce_sum(image_total_weights), 0), lambda: tf.reduce_sum(tf.cast(valid_points, dtype=tf.float32), axis=1), lambda: image_total_weights) best_image = tf.math.argmax(image_total_weights) view_image_inputs[view_name] = view_image_inputs[view_name][ best_image:best_image + 1, :, :, :] view_indices_2d_inputs[view_name] = view_indices_2d_inputs[view_name][ best_image:best_image + 1, :, :]
def randomly_crop_points(mesh_inputs, view_indices_2d_inputs, x_random_crop_size, y_random_crop_size, epsilon=1e-5): """Randomly crops points. Args: mesh_inputs: A dictionary containing input mesh (point) tensors. view_indices_2d_inputs: A dictionary containing input point to view correspondence tensors. x_random_crop_size: Size of the random crop in x dimension. If None, random crop will not take place on x dimension. y_random_crop_size: Size of the random crop in y dimension. If None, random crop will not take place on y dimension. epsilon: Epsilon (a very small value) used to add as a small margin to thresholds. """ if x_random_crop_size is None and y_random_crop_size is None: return points = mesh_inputs[standard_fields.InputDataFields.point_positions] num_points = tf.shape(points)[0] # Pick a random point if x_random_crop_size is not None or y_random_crop_size is not None: random_index = tf.random.uniform([], minval=0, maxval=num_points, dtype=tf.int32) center_x = points[random_index, 0] center_y = points[random_index, 1] points_x = points[:, 0] points_y = points[:, 1] min_x = tf.reduce_min(points_x) - epsilon max_x = tf.reduce_max(points_x) + epsilon min_y = tf.reduce_min(points_y) - epsilon max_y = tf.reduce_max(points_y) + epsilon if x_random_crop_size is not None: min_x = center_x - x_random_crop_size / 2.0 - epsilon max_x = center_x + x_random_crop_size / 2.0 + epsilon if y_random_crop_size is not None: min_y = center_y - y_random_crop_size / 2.0 - epsilon max_y = center_y + y_random_crop_size / 2.0 + epsilon x_mask = tf.logical_and(tf.greater(points_x, min_x), tf.less(points_x, max_x)) y_mask = tf.logical_and(tf.greater(points_y, min_y), tf.less(points_y, max_y)) points_mask = tf.logical_and(x_mask, y_mask) for key in sorted(mesh_inputs): mesh_inputs[key] = tf.boolean_mask(mesh_inputs[key], points_mask) for key in sorted(view_indices_2d_inputs): view_indices_2d_inputs[key] = tf.transpose( tf.boolean_mask( tf.transpose(view_indices_2d_inputs[key], [1, 0, 2]), points_mask), [1, 0, 2])
def _pad_or_clip_voxels(voxel_features, voxel_indices, num_valid_voxels, segment_ids, voxels_pad_or_clip_size): """Pads or clips voxels.""" if voxels_pad_or_clip_size: num_valid_voxels = tf.minimum(num_valid_voxels, voxels_pad_or_clip_size) num_channels = voxel_features.get_shape().as_list()[-1] if len(voxel_features.shape.as_list()) == 2: output_shape = [voxels_pad_or_clip_size, num_channels] elif len(voxel_features.shape.as_list()) == 3: num_samples_per_voxel = voxel_features.get_shape().as_list()[1] if num_samples_per_voxel is None: num_samples_per_voxel = tf.shape(voxel_features)[1] output_shape = [ voxels_pad_or_clip_size, num_samples_per_voxel, num_channels ] else: raise ValueError('voxel_features should be either rank 2 or 3.') voxel_features = shape_utils.pad_or_clip_nd( tensor=voxel_features, output_shape=output_shape) voxel_indices = shape_utils.pad_or_clip_nd( tensor=voxel_indices, output_shape=[voxels_pad_or_clip_size, 3]) valid_segment_ids_mask = tf.cast( tf.less(segment_ids, num_valid_voxels), dtype=tf.int32) segment_ids *= valid_segment_ids_mask return voxel_features, voxel_indices, num_valid_voxels, segment_ids
def true_fn(images): if augment_entire_batch: image_2 = images mean_color = tf.reduce_mean(image_2, axis=[1, 2], keepdims=True) print(mean_color.shape) else: image_1, image_2 = tf.unstack(images) mean_color = tf.reduce_mean(image_2, axis=[0, 1], keepdims=True) def body(var_img, mean_color): x0 = tf.random.uniform([], 0, width, dtype=tf.int32) y0 = tf.random.uniform([], 0, height, dtype=tf.int32) dx = tf.random.uniform([], min_size, max_size, dtype=tf.int32) dy = tf.random.uniform([], min_size, max_size, dtype=tf.int32) x = tf.range(width) x_mask = (x0 <= x) & (x < x0+dx) y = tf.range(height) y_mask = (y0 <= y) & (y < y0+dy) mask = x_mask & y_mask[:, tf.newaxis] mask = tf.cast(mask[:, :, tf.newaxis], image_2.dtype) result = var_img * (1 - mask) + mean_color * mask return result # Perform at least one erase operation. image_2 = body(image_2, mean_color) # Perform additional erase operations. for _ in range(max_operations - 1): perform_erase = tf.less( tf.random.uniform([]), probability_additional_operations) image_2 = tf.cond(perform_erase, lambda: body(image_2, mean_color), lambda: image_2) if augment_entire_batch: images = image_2 else: images = tf.stack([image_1, image_2]) return images
def embedding_regularization_loss(inputs, outputs, lambda_coef=0.0001, regularization_type='unit_length', is_intermediate=False): """Classification loss with an iou threshold. Args: inputs: A dictionary that contains num_valid_voxels - A tf.int32 tensor of size [batch_size]. instance_ids - A tf.int32 tensor of size [batch_size, n]. outputs: A dictionart that contains embeddings - A tf.float32 tensor of size [batch_size, n, f]. lambda_coef: Regularization loss coefficient. regularization_type: Regularization loss type. Supported values are 'msq' and 'unit_length'. 'msq' stands for 'mean square' which penalizes the embedding vectors if they have a length far from zero. 'unit_length' penalizes the embedding vectors if they have a length far from one. is_intermediate: True if applied to intermediate predictions; otherwise, False. Returns: A tf.float32 scalar loss tensor. """ instance_ids_key = standard_fields.InputDataFields.object_instance_id_voxels num_voxels_key = standard_fields.InputDataFields.num_valid_voxels if is_intermediate: embedding_key = ( standard_fields.DetectionResultFields .intermediate_instance_embedding_voxels) else: embedding_key = ( standard_fields.DetectionResultFields.instance_embedding_voxels) if instance_ids_key not in inputs: raise ValueError('instance_ids is missing in inputs.') if embedding_key not in outputs: raise ValueError('embedding is missing in outputs.') if num_voxels_key not in inputs: raise ValueError('num_voxels is missing in inputs.') batch_size = inputs[num_voxels_key].get_shape().as_list()[0] if batch_size is None: raise ValueError('batch_size is not defined at graph construction time.') num_valid_voxels = inputs[num_voxels_key] num_voxels = tf.shape(inputs[instance_ids_key])[1] valid_mask = tf.less( tf.tile(tf.expand_dims(tf.range(num_voxels), axis=0), [batch_size, 1]), tf.expand_dims(num_valid_voxels, axis=1)) valid_mask = tf.reshape(valid_mask, [-1]) embedding_dims = outputs[embedding_key].get_shape().as_list()[-1] if embedding_dims is None: raise ValueError( 'Embedding dimension is unknown at graph construction time.') embedding = tf.reshape(outputs[embedding_key], [-1, embedding_dims]) embedding = tf.boolean_mask(embedding, valid_mask) return metric_learning_losses.regularization_loss( embedding=embedding, lambda_coef=lambda_coef, regularization_type=regularization_type)
def _box_classification_loss_unbatched(inputs_1, outputs_1, is_intermediate, is_balanced, mine_hard_negatives, hard_negative_score_threshold): """Loss function for input and outputs of batch size 1.""" valid_mask = _get_voxels_valid_mask(inputs_1=inputs_1) if is_intermediate: logits = outputs_1[standard_fields.DetectionResultFields. intermediate_object_semantic_voxels] else: logits = outputs_1[ standard_fields.DetectionResultFields.object_semantic_voxels] num_classes = logits.get_shape().as_list()[-1] if num_classes is None: raise ValueError('Number of classes is unknown.') logits = tf.boolean_mask(tf.reshape(logits, [-1, num_classes]), valid_mask) labels = tf.boolean_mask( tf.reshape( inputs_1[standard_fields.InputDataFields.object_class_voxels], [-1, 1]), valid_mask) if mine_hard_negatives or is_balanced: instances = tf.boolean_mask( tf.reshape( inputs_1[ standard_fields.InputDataFields.object_instance_id_voxels], [-1]), valid_mask) params = {} if mine_hard_negatives: negative_scores = tf.reshape(tf.nn.softmax(logits)[:, 0], [-1]) hard_negative_mask = tf.logical_and( tf.less(negative_scores, hard_negative_score_threshold), tf.equal(tf.reshape(labels, [-1]), 0)) hard_negative_labels = tf.boolean_mask(labels, hard_negative_mask) hard_negative_logits = tf.boolean_mask(logits, hard_negative_mask) hard_negative_instances = tf.boolean_mask( tf.ones_like(instances) * (tf.reduce_max(instances) + 1), hard_negative_mask) logits = tf.concat([logits, hard_negative_logits], axis=0) instances = tf.concat([instances, hard_negative_instances], axis=0) labels = tf.concat([labels, hard_negative_labels], axis=0) if is_balanced: weights = loss_utils.get_balanced_loss_weights_multiclass( labels=tf.expand_dims(instances, axis=1)) params['weights'] = weights return classification_loss_fn(logits=logits, labels=labels, **params)
def random_flip_left_right(images, flow, mask, probability): """Performs a random left/right flip.""" perform_flip = tf.less(tf.random.uniform([]), probability) # apply flip images = tf.cond(pred=perform_flip, true_fn=lambda: tf.reverse(images, axis=[-2]), false_fn=lambda: images) if flow is not None: flow = tf.cond(pred=perform_flip, true_fn=lambda: tf.reverse(flow, axis=[-2]), false_fn=lambda: flow) mask = tf.cond(pred=perform_flip, true_fn=lambda: tf.reverse(mask, axis=[-2]), false_fn=lambda: mask) # correct sign of flow sign_correction = tf.reshape([1.0, -1.0], [1, 1, 2]) flow = tf.cond(pred=perform_flip, true_fn=lambda: flow * sign_correction, false_fn=lambda: flow) return images, flow, mask
def random_flip_up_down(images, flow, mask, probability): """Performs a random up/down flip.""" # 50/50 chance perform_flip = tf.less(tf.random.uniform([]), probability) # apply flip images = tf.cond(pred=perform_flip, true_fn=lambda: tf.reverse(images, axis=[-3]), false_fn=lambda: images) if flow is not None: flow = tf.cond(pred=perform_flip, true_fn=lambda: tf.reverse(flow, axis=[-3]), false_fn=lambda: flow) mask = tf.cond(pred=perform_flip, true_fn=lambda: tf.reverse(mask, axis=[-3]), false_fn=lambda: mask) # correct sign of flow sign_correction = tf.reshape([-1.0, 1.0], [1, 1, 2]) flow = tf.cond(pred=perform_flip, true_fn=lambda: flow * sign_correction, false_fn=lambda: flow) return images, flow, mask
def random_eraser(images, min_size, max_size, probability, max_operations, probability_additional_operations, augment_entire_batch = False): """Earses a random rectangle shaped areas in the second image or image batch. Args: images: Stacked image pair that should be augmented with shape [2, height, width, 3] or a batch of images that should be augmented with shape [batch, height, width, 3]. min_size: Minimum size of erased rectangle. max_size: Maximum size of erased rectangle. probability: Probability of applying this augementation function. max_operations: Maximum number total areas that should be erased. probability_additional_operations: Probability for each additional area to be erased if augementation is applied. augment_entire_batch: If true the input is treated as batch of images to which the augmentation should be applid. Returns: Possibly augemented images. """ perform_erase = tf.less(tf.random.uniform([]), probability) height = tf.shape(images)[-3] width = tf.shape(images)[-2] # Returns augemented images. def true_fn(images): if augment_entire_batch: image_2 = images mean_color = tf.reduce_mean(image_2, axis=[1, 2], keepdims=True) print(mean_color.shape) else: image_1, image_2 = tf.unstack(images) mean_color = tf.reduce_mean(image_2, axis=[0, 1], keepdims=True) def body(var_img, mean_color): x0 = tf.random.uniform([], 0, width, dtype=tf.int32) y0 = tf.random.uniform([], 0, height, dtype=tf.int32) dx = tf.random.uniform([], min_size, max_size, dtype=tf.int32) dy = tf.random.uniform([], min_size, max_size, dtype=tf.int32) x = tf.range(width) x_mask = (x0 <= x) & (x < x0+dx) y = tf.range(height) y_mask = (y0 <= y) & (y < y0+dy) mask = x_mask & y_mask[:, tf.newaxis] mask = tf.cast(mask[:, :, tf.newaxis], image_2.dtype) result = var_img * (1 - mask) + mean_color * mask return result # Perform at least one erase operation. image_2 = body(image_2, mean_color) # Perform additional erase operations. for _ in range(max_operations - 1): perform_erase = tf.less( tf.random.uniform([]), probability_additional_operations) image_2 = tf.cond(perform_erase, lambda: body(image_2, mean_color), lambda: image_2) if augment_entire_batch: images = image_2 else: images = tf.stack([image_1, image_2]) return images # Returns unaugmented images. def false_fn(images): return images return tf.cond(perform_erase, lambda: true_fn(images), lambda: false_fn(images))
def classification_loss_using_mask_iou(inputs, outputs, num_samples, max_instance_id=None, similarity_strategy='distance', is_balanced=True, is_intermediate=False): """Classification loss with an iou threshold. Args: inputs: A dictionary that contains num_valid_voxels - A tf.int32 tensor of size [batch_size]. instance_ids - A tf.int32 tensor of size [batch_size, n]. class_labels - A tf.int32 tensor of size [batch_size, n]. It is assumed that the background voxels are assigned to class 0. outputs: A dictionart that contains embeddings - A tf.float32 tensor of size [batch_size, n, f]. logits - A tf.float32 tensor of size [batch_size, n, num_classes]. It is assumed that background is class 0. num_samples: An int determining the number of samples. max_instance_id: If set, instance ids larger than that value will be ignored. If not set, it will be computed from instance_ids tensor. similarity_strategy: Defines the method for computing similarity between embedding vectors. Possible values are 'dotproduct' and 'distance'. is_balanced: If True, the per-voxel losses are re-weighted to have equal total weight for foreground vs. background voxels. is_intermediate: True if applied to intermediate predictions; otherwise, False. Returns: A tf.float32 scalar loss tensor. """ instance_ids_key = standard_fields.InputDataFields.object_instance_id_voxels class_labels_key = standard_fields.InputDataFields.object_class_voxels num_voxels_key = standard_fields.InputDataFields.num_valid_voxels if is_intermediate: embedding_key = (standard_fields.DetectionResultFields. intermediate_instance_embedding_voxels) logits_key = (standard_fields.DetectionResultFields. intermediate_object_semantic_voxels) else: embedding_key = ( standard_fields.DetectionResultFields.instance_embedding_voxels) logits_key = standard_fields.DetectionResultFields.object_semantic_voxels if instance_ids_key not in inputs: raise ValueError('instance_ids is missing in inputs.') if class_labels_key not in inputs: raise ValueError('class_labels is missing in inputs.') if num_voxels_key not in inputs: raise ValueError('num_voxels is missing in inputs.') if embedding_key not in outputs: raise ValueError('embedding is missing in outputs.') if logits_key not in outputs: raise ValueError('logits is missing in outputs.') batch_size = inputs[num_voxels_key].get_shape().as_list()[0] if batch_size is None: raise ValueError( 'batch_size is not defined at graph construction time.') num_valid_voxels = inputs[num_voxels_key] num_voxels = tf.shape(inputs[instance_ids_key])[1] valid_mask = tf.less( tf.tile(tf.expand_dims(tf.range(num_voxels), axis=0), [batch_size, 1]), tf.expand_dims(num_valid_voxels, axis=1)) return classification_loss_using_mask_iou_func( embeddings=outputs[embedding_key], logits=outputs[logits_key], instance_ids=tf.reshape(inputs[instance_ids_key], [batch_size, -1]), class_labels=inputs[class_labels_key], num_samples=num_samples, valid_mask=valid_mask, max_instance_id=max_instance_id, similarity_strategy=similarity_strategy, is_balanced=is_balanced)
def npair_loss(inputs, outputs, num_samples, max_instance_id=None, similarity_strategy='distance', loss_strategy='softmax', is_intermediate=False): """N-pair metric learning loss for learning feature embeddings. Args: inputs: A dictionary that contains instance_ids - A tf.int32 tensor of size [batch_size, n]. valid_mask - A tf.bool tensor of size [batch_size, n] that is True when an element is valid and False if it needs to be ignored. By default the value is None which means it is not applied. outputs: A dictionary that contains embeddings - A tf.float32 tensor of size [batch_size, n, f]. num_samples: An int determinig the number of samples. max_instance_id: If set, instance ids larger than that value will be ignored. If not set, it will be computed from instance_ids tensor. similarity_strategy: Defines the method for computing similarity between embedding vectors. Possible values are 'dotproduct' and 'distance'. loss_strategy: Defines the type of loss including 'softmax' or 'sigmoid'. is_intermediate: True if applied to intermediate predictions; otherwise, False. Returns: A tf.float32 scalar loss tensor. """ instance_ids_key = standard_fields.InputDataFields.object_instance_id_voxels num_voxels_key = standard_fields.InputDataFields.num_valid_voxels if is_intermediate: embedding_key = ( standard_fields.DetectionResultFields .intermediate_instance_embedding_voxels) else: embedding_key = ( standard_fields.DetectionResultFields.instance_embedding_voxels) if instance_ids_key not in inputs: raise ValueError('object_instance_id_voxels is missing in inputs.') if num_voxels_key not in inputs: raise ValueError('num_voxels is missing in inputs.') if embedding_key not in outputs: raise ValueError('embedding key is missing in outputs.') batch_size = inputs[num_voxels_key].get_shape().as_list()[0] if batch_size is None: raise ValueError('batch_size is not defined at graph construction time.') num_valid_voxels = inputs[num_voxels_key] num_voxels = tf.shape(inputs[instance_ids_key])[1] valid_mask = tf.less( tf.tile(tf.expand_dims(tf.range(num_voxels), axis=0), [batch_size, 1]), tf.expand_dims(num_valid_voxels, axis=1)) return npair_loss_func( embeddings=outputs[embedding_key], instance_ids=tf.reshape(inputs[instance_ids_key], [batch_size, -1]), num_samples=num_samples, valid_mask=valid_mask, max_instance_id=max_instance_id, similarity_strategy=similarity_strategy, loss_strategy=loss_strategy)
def _safe_div(a, b): """Divides two numbers, returns 0 if denominator is (close to) 0.""" return tf.where(tf.less(tf.abs(b), 1e-10), 0.0, a / b)
def prepare_waymo_open_dataset(inputs, valid_object_classes=None, max_object_distance_from_source=74.88): """Maps the fields from loaded input to standard fields. Args: inputs: A dictionary of input tensors. valid_object_classes: List of valid object classes. if None, it is ignored. max_object_distance_from_source: Maximum distance of objects from source. It will be ignored if None. Returns: A dictionary of input tensors with standard field names. """ prepared_inputs = {} if standard_fields.InputDataFields.point_positions in inputs: prepared_inputs[standard_fields.InputDataFields.point_positions] = inputs[ standard_fields.InputDataFields.point_positions] if standard_fields.InputDataFields.point_intensities in inputs: prepared_inputs[standard_fields.InputDataFields.point_intensities] = inputs[ standard_fields.InputDataFields.point_intensities] if standard_fields.InputDataFields.point_elongations in inputs: prepared_inputs[standard_fields.InputDataFields.point_elongations] = inputs[ standard_fields.InputDataFields.point_elongations] if standard_fields.InputDataFields.point_normals in inputs: prepared_inputs[standard_fields.InputDataFields.point_normals] = inputs[ standard_fields.InputDataFields.point_normals] if 'cameras/front/intrinsics/K' in inputs: prepared_inputs[standard_fields.InputDataFields .camera_intrinsics] = inputs['cameras/front/intrinsics/K'] if 'cameras/front/extrinsics/R' in inputs: prepared_inputs[ standard_fields.InputDataFields .camera_rotation_matrix] = inputs['cameras/front/extrinsics/R'] if 'cameras/front/extrinsics/t' in inputs: prepared_inputs[standard_fields.InputDataFields .camera_translation] = inputs['cameras/front/extrinsics/t'] if 'cameras/front/image' in inputs: prepared_inputs[standard_fields.InputDataFields .camera_image] = inputs['cameras/front/image'] prepared_inputs[standard_fields.InputDataFields .camera_raw_image] = inputs['cameras/front/image'] prepared_inputs[standard_fields.InputDataFields .camera_original_image] = inputs['cameras/front/image'] if 'scene_name' in inputs and 'frame_name' in inputs: prepared_inputs[ standard_fields.InputDataFields.camera_image_name] = tf.strings.join( [inputs['scene_name'], inputs['frame_name']], separator='_') if 'objects/pose/R' in inputs: prepared_inputs[standard_fields.InputDataFields .objects_rotation_matrix] = inputs['objects/pose/R'] if 'objects/pose/t' in inputs: prepared_inputs[standard_fields.InputDataFields .objects_center] = inputs['objects/pose/t'] if 'objects/shape/dimension' in inputs: prepared_inputs[ standard_fields.InputDataFields.objects_length] = tf.reshape( inputs['objects/shape/dimension'][:, 0], [-1, 1]) prepared_inputs[standard_fields.InputDataFields.objects_width] = tf.reshape( inputs['objects/shape/dimension'][:, 1], [-1, 1]) prepared_inputs[ standard_fields.InputDataFields.objects_height] = tf.reshape( inputs['objects/shape/dimension'][:, 2], [-1, 1]) if 'objects/category/label' in inputs: prepared_inputs[standard_fields.InputDataFields.objects_class] = tf.reshape( inputs['objects/category/label'], [-1, 1]) if valid_object_classes is not None: valid_objects_mask = tf.cast( tf.zeros_like( prepared_inputs[standard_fields.InputDataFields.objects_class], dtype=tf.int32), dtype=tf.bool) for object_class in valid_object_classes: valid_objects_mask = tf.logical_or( valid_objects_mask, tf.equal( prepared_inputs[standard_fields.InputDataFields.objects_class], object_class)) valid_objects_mask = tf.reshape(valid_objects_mask, [-1]) for key in standard_fields.get_input_object_fields(): if key in prepared_inputs: prepared_inputs[key] = tf.boolean_mask(prepared_inputs[key], valid_objects_mask) if max_object_distance_from_source is not None: if standard_fields.InputDataFields.objects_center in prepared_inputs: object_distances = tf.norm( prepared_inputs[standard_fields.InputDataFields.objects_center][:, 0:2], axis=1) valid_mask = tf.less(object_distances, max_object_distance_from_source) for key in standard_fields.get_input_object_fields(): if key in prepared_inputs: prepared_inputs[key] = tf.boolean_mask(prepared_inputs[key], valid_mask) return prepared_inputs
def _sample(a, ou): return tf.cond( tf.less(tf.random.uniform((), 0, 1), epsilon_greedy), lambda: a + ou(), lambda: a)