def prediction_loss(self, truths, palette): def spatial_loss(truth_features, predicted_features, space_desc): feature_losses = [] for truth, prediction, spec in zip(truth_features, predicted_features, space_desc.features): if spec.type == FeatureType.CATEGORICAL: truth = tf.transpose(truth, (0, 2, 3, 1)) prediction = tf.transpose(prediction, (0, 2, 3, 1)) feature_losses.append( tf.losses.softmax_cross_entropy(truth, prediction)) summary_image = tf.argmax( tf.concat([truth, prediction], 2), 3) summary_image = tf.gather( palette[space_desc.index][spec.index], summary_image) tf.summary.image(spec.name, summary_image) else: feature_losses.append( tf.losses.mean_squared_error(truth, prediction)) summary_image = tf.concat([truth, prediction], 3) tf.summary.image(spec.name, tf.transpose(summary_image, (0, 2, 3, 1))) tf.summary.scalar(spec.name, feature_losses[-1]) return tf.reduce_mean(tf.stack(feature_losses)) with tf.name_scope('prediction_loss'): spatial_losses = [] for s in self.env_spec.spaces: with tf.name_scope(s.name): loss = spatial_loss(truths[s.index], self.out_pred[s.index], s) spatial_losses.append(loss) tf.summary.scalar('loss', loss) loss = tf.reduce_mean(tf.stack(spatial_losses)) tf.summary.scalar('loss', loss) return loss
def compute_target_greedy_q(reward, gamma, next_actions, next_q_values, next_states, terminals): """Computes the optimal target Q value with the adaptive greedy algorithm. This algorithm corresponds to the method "GT" in Ie et al. https://arxiv.org/abs/1905.12767.. Args: reward: [batch_size] tensor, the immediate reward. gamma: float, discount factor with the usual RL meaning. next_actions: [batch_size, slate_size] tensor, the next slate. next_q_values: [batch_size, num_of_documents] tensor, the q values of the documents in the next step. next_states: [batch_size, 1 + num_of_documents] tensor, the features for the user and the docuemnts in the next step. terminals: [batch_size] tensor, indicating if this is a terminal step. Returns: [batch_size] tensor, the target q values. """ slate_size = next_actions.get_shape().as_list()[1] stack_number = -1 user_obs = next_states[:, 0, :, stack_number] doc_obs = next_states[:, 1:, :, stack_number] batch_size = next_q_values.get_shape().as_list()[0] next_greedy_q_list = [] for i in range(batch_size): s, s_no_click = score_documents_tf(user_obs[i], doc_obs[i]) q = next_q_values[i] slate = select_slate_greedy(slate_size, s_no_click, s, q) p_selected = compute_probs_tf(slate, s, s_no_click) q_selected = tf.gather(q, slate) next_greedy_q_list.append( tf.reduce_sum(input_tensor=p_selected * q_selected)) next_greedy_q_values = tf.stack(next_greedy_q_list) return reward + gamma * next_greedy_q_values * ( 1. - tf.cast(terminals, tf.float32))
def select_slate_optimal(slate_size, s_no_click, s, q): """Selects the slate using exhaustive search. This algorithm corresponds to the method "OS" in Ie et al. https://arxiv.org/abs/1905.12767. Args: slate_size: int, the size of the recommendation slate. s_no_click: float tensor, the score for not clicking any document. s: [num_of_documents] tensor, the scores for clicking documents. q: [num_of_documents] tensor, the predicted q values for documents. Returns: [slate_size] tensor, the selected slate. """ num_candidates = s.shape.as_list()[0] # Obtain all possible slates given current docs in the candidate set. mesh_args = [list(range(num_candidates))] * slate_size slates = tf.stack(tf.meshgrid(*mesh_args), axis=-1) slates = tf.reshape(slates, shape=(-1, slate_size)) # Filter slates that include duplicates to ensure each document is picked # at most once. unique_mask = tf.map_fn( lambda x: tf.equal(tf.size(input=x), tf.size(input=tf.unique(x)[0])), slates, dtype=tf.bool) slates = tf.boolean_mask(tensor=slates, mask=unique_mask) slate_q_values = tf.gather(s * q, slates) slate_scores = tf.gather(s, slates) slate_normalizer = tf.reduce_sum(input_tensor=slate_scores, axis=1) + s_no_click slate_q_values = slate_q_values / tf.expand_dims(slate_normalizer, 1) slate_sum_q_values = tf.reduce_sum(input_tensor=slate_q_values, axis=1) max_q_slate_index = tf.argmax(input=slate_sum_q_values) return tf.gather(slates, max_q_slate_index, axis=0)
def rotate_points_around_axis(points, rotation_angle, axis=2): """Rotates points around axis. Args: points: A tf.float32 tensor of size [N, 3] containing points. rotation_angle: A float value containing the rotation angle in radians. axis: A value in [0, 1, 2] for rotating around x, y, z axis. Returns: rotated_points: A tf.float32 tensor of size [N, 3] containing points. """ if axis not in [0, 1, 2]: raise ValueError(('axis is out of bound: %d' % axis)) c = tf.cos(rotation_angle) s = tf.sin(rotation_angle) new_points = [points[:, 0], points[:, 1], points[:, 2]] other_axis = list(set([0, 1, 2]) - set([axis])) new_points[other_axis[0]] = (points[:, other_axis[0]] * c - points[:, other_axis[1]] * s) new_points[other_axis[1]] = (points[:, other_axis[0]] * s + points[:, other_axis[1]] * c) return tf.stack(new_points, axis=1)
def compute_target_sarsa(reward, gamma, next_actions, next_q_values, next_states, terminals): """Computes the SARSA target Q value. Args: reward: [batch_size] tensor, the immediate reward. gamma: float, discount factor with the usual RL meaning. next_actions: [batch_size, slate_size] tensor, the next slate. next_q_values: [batch_size, num_of_documents] tensor, the q values of the documents in the next step. next_states: [batch_size, 1 + num_of_documents] tensor, the features for the user and the docuemnts in the next step. terminals: [batch_size] tensor, indicating if this is a terminal step. Returns: [batch_size] tensor, the target q values. """ stack_number = -1 user_obs = next_states[:, 0, :, stack_number] doc_obs = next_states[:, 1:, :, stack_number] batch_size = next_q_values.get_shape().as_list()[0] next_sarsa_q_list = [] for i in range(batch_size): s, s_no_click = score_documents_tf(user_obs[i], doc_obs[i]) q = next_q_values[i] slate = tf.expand_dims(next_actions[i], 1) p_selected = compute_probs_tf(slate, s, s_no_click) q_selected = tf.gather(q, slate) next_sarsa_q_list.append( tf.reduce_sum(input_tensor=p_selected * q_selected)) next_sarsa_q_values = tf.stack(next_sarsa_q_list) return reward + gamma * next_sarsa_q_values * ( 1. - tf.cast(terminals, tf.float32))
def true_fn(images): if augment_entire_batch: image_2 = images mean_color = tf.reduce_mean(image_2, axis=[1, 2], keepdims=True) print(mean_color.shape) else: image_1, image_2 = tf.unstack(images) mean_color = tf.reduce_mean(image_2, axis=[0, 1], keepdims=True) def body(var_img, mean_color): x0 = tf.random.uniform([], 0, width, dtype=tf.int32) y0 = tf.random.uniform([], 0, height, dtype=tf.int32) dx = tf.random.uniform([], min_size, max_size, dtype=tf.int32) dy = tf.random.uniform([], min_size, max_size, dtype=tf.int32) x = tf.range(width) x_mask = (x0 <= x) & (x < x0 + dx) y = tf.range(height) y_mask = (y0 <= y) & (y < y0 + dy) mask = x_mask & y_mask[:, tf.newaxis] mask = tf.cast(mask[:, :, tf.newaxis], image_2.dtype) result = var_img * (1 - mask) + mean_color * mask return result # Perform at least one erase operation. image_2 = body(image_2, mean_color) # Perform additional erase operations. for _ in range(max_operations - 1): perform_erase = tf.less(tf.random.uniform([]), probability_additional_operations) image_2 = tf.cond(perform_erase, lambda: body(image_2, mean_color), lambda: image_2) if augment_entire_batch: images = image_2 else: images = tf.stack([image_1, image_2]) return images
def true_fn(images): image_1, image_2 = tf.unstack(images) image_1 = tf.image.random_brightness(image_1, max_delta) image_2 = tf.image.random_brightness(image_2, max_delta) return tf.stack([image_1, image_2])
def true_fn(images): image_1, image_2 = tf.unstack(images) image_1 = tf.image.random_contrast(image_1, min_bound, max_bound) image_2 = tf.image.random_contrast(image_2, min_bound, max_bound) return tf.stack([image_1, image_2])
def true_fn(images, flow, mask): angle_radian = tf.random.uniform( [], minval=-max_rotation, maxval=max_rotation, dtype=tf.float32) * math.pi / 180.0 image_1, image_2 = tf.unstack(images) image_2 = rotate(image_2, angle_radian, is_flow=False, mask=None) images = tf.stack([image_1, image_2]) if not_empty_crop: orig_height = tf.shape(images)[-3] orig_width = tf.shape(images)[-2] # introduce abbreviations for shorter notation cos = tf.math.cos(angle_radian % math.pi) sin = tf.math.sin(angle_radian % math.pi) h = tf.cast(orig_height, tf.float32) w = tf.cast(orig_width, tf.float32) # compute required scale factor scale = tf.cond(tf.math.less(angle_radian % math.pi, math.pi/2.0), lambda: tf.math.maximum((w/h)*sin+cos, (h/w)*sin+cos), lambda: tf.math.maximum((w/h)*sin-cos, (h/w)*sin-cos)) new_height = tf.math.floor(h / scale) new_width = tf.math.floor(w / scale) # crop image again to original size offset_height = tf.cast((h-new_height)/2, tf.int32) offset_width = tf.cast((w-new_width)/2, tf.int32) images = tf.image.crop_to_bounding_box( images, offset_height=offset_height, offset_width=offset_width, target_height=tf.cast(new_height, tf.int32), target_width=tf.cast(new_width, tf.int32)) if flow is not None: # get current locations (with the origin in the image center) positions = _positions_center_origin(orig_height, orig_width) # compute augmented flow (multiply by mask to zero invalid flow locations) cos = tf.math.cos(angle_radian) sin = tf.math.sin(angle_radian) rotation_matrix = tf.reshape([cos, sin, -sin, cos], [2, 2]) flow = (tf.linalg.matmul( (positions + flow), rotation_matrix) - positions) * mask if not_empty_crop: # crop flow and mask again to original size flow = tf.image.crop_to_bounding_box( flow, offset_height=offset_height, offset_width=offset_width, target_height=tf.cast(new_height, tf.int32), target_width=tf.cast(new_width, tf.int32)) mask = tf.image.crop_to_bounding_box( mask, offset_height=offset_height, offset_width=offset_width, target_height=tf.cast(new_height, tf.int32), target_width=tf.cast(new_width, tf.int32)) return images, flow, mask
def random_crop(images, flow, mask, crop_height, crop_width, relative_offset, probability_crop_offset): """Performs a random crop with the given height and width.""" # early return if crop_height or crop_width is not specified if crop_height is None or crop_width is None: return images, flow, mask orig_height = tf.shape(images)[-3] orig_width = tf.shape(images)[-2] # check if crop size fits the image size scale = 1.0 ratio = tf.cast(crop_height, tf.float32) / tf.cast(orig_height, tf.float32) scale = tf.math.maximum(scale, ratio) ratio = tf.cast(crop_width, tf.float32) / tf.cast(orig_width, tf.float32) scale = tf.math.maximum(scale, ratio) # compute minimum required hight new_height = tf.cast( tf.math.ceil(tf.cast(orig_height, tf.float32) * scale), tf.int32) new_width = tf.cast( tf.math.ceil(tf.cast(orig_width, tf.float32) * scale), tf.int32) # perform resize (scales with 1 if not required) images = smurf_utils.resize(images, new_height, new_width, is_flow=False) # compute joint offset max_offset_h = new_height - tf.cast(crop_height, dtype=tf.int32) max_offset_w = new_width - tf.cast(crop_width, dtype=tf.int32) joint_offset_h = tf.random.uniform([], maxval=max_offset_h+1, dtype=tf.int32) joint_offset_w = tf.random.uniform([], maxval=max_offset_w+1, dtype=tf.int32) # compute relative offset min_relative_offset_h = tf.math.maximum( joint_offset_h - relative_offset, 0) max_relative_offset_h = tf.math.minimum( joint_offset_h + relative_offset, max_offset_h) min_relative_offset_w = tf.math.maximum( joint_offset_w - relative_offset, 0) max_relative_offset_w = tf.math.minimum( joint_offset_w + relative_offset, max_offset_w) relative_offset_h = tf.random.uniform( [], minval=min_relative_offset_h, maxval=max_relative_offset_h+1, dtype=tf.int32) relative_offset_w = tf.random.uniform( [], minval=min_relative_offset_w, maxval=max_relative_offset_w+1, dtype=tf.int32) set_crop_offset = tf.random.uniform([]) < probability_crop_offset relative_offset_h = tf.cond( set_crop_offset, lambda: relative_offset_h, lambda: joint_offset_h) relative_offset_w = tf.cond( set_crop_offset, lambda: relative_offset_w, lambda: joint_offset_w) # crop both images image_1, image_2 = tf.unstack(images) image_1 = tf.image.crop_to_bounding_box( image_1, offset_height=joint_offset_h, offset_width=joint_offset_w, target_height=crop_height, target_width=crop_width) image_2 = tf.image.crop_to_bounding_box( image_2, offset_height=relative_offset_h, offset_width=relative_offset_w, target_height=crop_height, target_width=crop_width) images = tf.stack([image_1, image_2]) if flow is not None: # perform resize (scales with 1 if not required) flow, mask = smurf_utils.resize( flow, new_height, new_width, is_flow=True, mask=mask) # crop flow and mask flow = tf.image.crop_to_bounding_box( flow, offset_height=joint_offset_h, offset_width=joint_offset_w, target_height=crop_height, target_width=crop_width) mask = tf.image.crop_to_bounding_box( mask, offset_height=joint_offset_h, offset_width=joint_offset_w, target_height=crop_height, target_width=crop_width) # correct flow for relative shift (/crop) flow_delta = tf.stack( [tf.cast(relative_offset_h - joint_offset_h, tf.float32), tf.cast(relative_offset_w - joint_offset_w, tf.float32)]) flow = (flow - flow_delta) * mask return images, flow, mask, joint_offset_h, joint_offset_w
def false_fn(scale_height, scale_width): return tf.stack((scale_height, scale_width), axis=0)
def true_fn(scale_height, scale_width): scale_height *= 2 ** tf.random.uniform([], -max_strech, max_strech) scale_width *= 2 ** tf.random.uniform([], -max_strech, max_strech) return tf.stack((scale_height, scale_width), axis=0)
def true_fn(images): image_1, image_2 = tf.unstack(images) image_1 = potential_asymmetric_augmentations(image_1) image_2 = potential_asymmetric_augmentations(image_2) return tf.stack([image_1, image_2])
def photometric_augmentation(images, augment_color_swap=True, augment_hue_shift=True, augment_saturation=False, augment_brightness=False, augment_contrast=False, augment_gaussian_noise=False, augment_brightness_individual=False, augment_contrast_individual=False, max_delta_hue=0.5, min_bound_saturation=0.8, max_bound_saturation=1.2, max_delta_brightness=0.1, min_bound_contrast=0.8, max_bound_contrast=1.2, min_bound_gaussian_noise=0.0, max_bound_gaussian_noise=0.02, max_delta_brightness_individual=0.02, min_bound_contrast_individual=0.95, max_bound_contrast_individual=1.05): """Applies photometric augmentations to an image pair.""" # Randomly permute colors by rolling and reversing. # This covers all permutations. if augment_color_swap: r = tf.random.uniform([], maxval=3, dtype=tf.int32) images = tf.roll(images, r, axis=-1) r = tf.equal(tf.random.uniform([], maxval=2, dtype=tf.int32), 1) images = tf.cond(pred=r, true_fn=lambda: tf.reverse(images, axis=[-1]), false_fn=lambda: images) if augment_hue_shift: images = tf.image.random_hue(images, max_delta_hue) if augment_saturation: images = tf.image.random_saturation( images, min_bound_saturation, max_bound_saturation) if augment_brightness: images = tf.image.random_brightness(images, max_delta_brightness) if augment_contrast: images = tf.image.random_contrast( images, min_bound_contrast, max_bound_contrast) if augment_gaussian_noise: sigma = tf.random.uniform([], minval=min_bound_gaussian_noise, maxval=max_bound_gaussian_noise, dtype=tf.float32) noise = tf.random.normal( tf.shape(input=images), stddev=sigma, dtype=tf.float32) images = images + noise # perform relative photometric augmentation (individually per image) image_1, image_2 = tf.unstack(images) if augment_brightness_individual: image_1 = tf.image.random_contrast( image_1, min_bound_contrast_individual, max_bound_contrast_individual) image_2 = tf.image.random_contrast( image_2, min_bound_contrast_individual, max_bound_contrast_individual) if augment_contrast_individual: image_1 = tf.image.random_brightness( image_1, max_delta_brightness_individual) image_2 = tf.image.random_brightness( image_2, max_delta_brightness_individual) # crop values to ensure values in [0,1] (some augmentations can violate this) image_1 = tf.clip_by_value(image_1, 0.0, 1.0) image_2 = tf.clip_by_value(image_2, 0.0, 1.0) return tf.stack([image_1, image_2])
def _non_nan_mean(tensor_list): """Calculates the mean of a list of tensors while ignoring nans.""" tensor = tf.stack(tensor_list) not_nan = tf.logical_not(tf.math.is_nan(tensor)) return tf.reduce_mean(tf.boolean_mask(tensor, not_nan))
def classification_loss_using_mask_iou_func(embeddings, logits, instance_ids, class_labels, num_samples, valid_mask=None, max_instance_id=None, similarity_strategy='dotproduct', is_balanced=True): """Classification loss using mask iou. Args: embeddings: A tf.float32 tensor of size [batch_size, n, f]. logits: A tf.float32 tensor of size [batch_size, n, num_classes]. It is assumed that background is class 0. instance_ids: A tf.int32 tensor of size [batch_size, n]. class_labels: A tf.int32 tensor of size [batch_size, n]. It is assumed that the background voxels are assigned to class 0. num_samples: An int determining the number of samples. valid_mask: A tf.bool tensor of size [batch_size, n] that is True when an element is valid and False if it needs to be ignored. By default the value is None which means it is not applied. max_instance_id: If set, instance ids larger than that value will be ignored. If not set, it will be computed from instance_ids tensor. similarity_strategy: Defines the method for computing similarity between embedding vectors. Possible values are 'dotproduct' and 'distance'. is_balanced: If True, the per-voxel losses are re-weighted to have equal total weight for foreground vs. background voxels. Returns: A tf.float32 scalar loss tensor. """ batch_size = embeddings.get_shape().as_list()[0] if batch_size is None: raise ValueError('Unknown batch size at graph construction time.') if max_instance_id is None: max_instance_id = tf.reduce_max(instance_ids) class_labels = tf.reshape(class_labels, [batch_size, -1, 1]) sampled_embeddings, sampled_instance_ids, sampled_indices = ( sampling_utils.balanced_sample(features=embeddings, instance_ids=instance_ids, num_samples=num_samples, valid_mask=valid_mask, max_instance_id=max_instance_id)) losses = [] for i in range(batch_size): embeddings_i = embeddings[i, :, :] instance_ids_i = instance_ids[i, :] class_labels_i = class_labels[i, :, :] logits_i = logits[i, :] sampled_embeddings_i = sampled_embeddings[i, :, :] sampled_instance_ids_i = sampled_instance_ids[i, :] sampled_indices_i = sampled_indices[i, :] sampled_class_labels_i = tf.gather(class_labels_i, sampled_indices_i) sampled_logits_i = tf.gather(logits_i, sampled_indices_i) if valid_mask is not None: valid_mask_i = valid_mask[i] embeddings_i = tf.boolean_mask(embeddings_i, valid_mask_i) instance_ids_i = tf.boolean_mask(instance_ids_i, valid_mask_i) loss_i = classification_loss_using_mask_iou_func_unbatched( embeddings=embeddings_i, instance_ids=instance_ids_i, sampled_embeddings=sampled_embeddings_i, sampled_instance_ids=sampled_instance_ids_i, sampled_class_labels=sampled_class_labels_i, sampled_logits=sampled_logits_i, similarity_strategy=similarity_strategy, is_balanced=is_balanced) losses.append(loss_i) return tf.math.reduce_mean(tf.stack(losses))
def prepare_lidar_images_and_correspondences( inputs, resized_image_height, resized_image_width, camera_names=('front', 'front_left', 'front_right', 'side_left', 'side_right'), lidar_names=('top', 'front', 'side_left', 'side_right', 'rear')): """Integrates and returns the lidars, cameras and their correspondences. Args: inputs: A dictionary containing the images and point / pixel correspondences. resized_image_height: Target height of the images. resized_image_width: Target width of the images. camera_names: List of cameras to include images from. lidar_names: List of lidars to include point clouds from. Returns: A tf.float32 tensor of size [num_points, 3] containing point positions. A tf.float32 tensor of size [num_points, 1] containing point intensities. A tf.float32 tensor of size [num_points, 1] containing point elongations. A tf.float32 tensor of size [num_points, 3] containing point normals. A tf.float32 tensor of size [num_images, resized_image_height, resized_image_width, 3]. A tf.int32 tensor of size [num_images, num_points, 2]. Raises: ValueError: If camera_names or lidar_names are empty lists. """ if not camera_names: raise ValueError('camera_names should contain at least one name.') if not lidar_names: raise ValueError('lidar_names should contain at least one name.') (points_position, points_intensity, points_elongation, points_normal, points_in_image_frame_yx, points_in_image_frame_id) = _prepare_lidar_points( inputs=inputs, lidar_names=lidar_names) images = [] points_in_image_frame = [] for camera_name in camera_names: image_key = ('cameras/%s/image' % camera_name) image_height = tf.shape(inputs[image_key])[0] image_width = tf.shape(inputs[image_key])[1] height_ratio = tf.cast( resized_image_height, dtype=tf.float32) / tf.cast( image_height, dtype=tf.float32) width_ratio = tf.cast( resized_image_width, dtype=tf.float32) / tf.cast( image_width, dtype=tf.float32) if tf.executing_eagerly(): resize_method = tf.image.ResizeMethod.NEAREST_NEIGHBOR else: resize_method = tf.image.ResizeMethod.BILINEAR if inputs[image_key].dtype in [ tf.int8, tf.uint8, tf.int16, tf.uint16, tf.int32, tf.int64 ]: resize_method = tf.image.ResizeMethod.NEAREST_NEIGHBOR images.append( tf.image.resize( images=inputs[image_key], size=[resized_image_height, resized_image_width], method=resize_method, antialias=True)) camera_id = tf.cast(inputs[('cameras/%s/id' % camera_name)], dtype=tf.int32) valid_points = tf.equal(points_in_image_frame_id, camera_id) valid_points = tf.tile(valid_points, [1, 2]) point_coords = tf.cast( tf.cast(points_in_image_frame_yx, dtype=tf.float32) * tf.stack([height_ratio, width_ratio]), dtype=tf.int32) points_in_image_frame_camera = tf.where( valid_points, point_coords, -tf.ones_like(valid_points, dtype=tf.int32)) points_in_image_frame.append(points_in_image_frame_camera) num_images = len(images) images = tf.stack(images, axis=0) images.set_shape([num_images, resized_image_height, resized_image_width, 3]) points_in_image_frame = tf.stack(points_in_image_frame, axis=0) return { 'points_position': points_position, 'points_intensity': points_intensity, 'points_elongation': points_elongation, 'points_normal': points_normal, 'view_images': {'rgb_view': images}, 'view_indices_2d': {'rgb_view': points_in_image_frame} }
def flip_points(points, x_rotate, y_rotate): return points * tf.stack((x_rotate, y_rotate, 1), axis=0)