def _scale_and_center_cloud(self, points, ids): num_scenes = tf.to_int32((tf.reduce_max(ids) // self.num_objects_per_scene) + 1) scene_ids = tf.to_int32(ids // self.num_objects_per_scene) # convert to int, as unsorted_segment_{min, max} is faster for int points_int = tf.to_int32(points * 10000.) axis_min = tf.to_float( tf.unsorted_segment_min(points_int, scene_ids, num_scenes)) / 10000. axis_max = tf.to_float( tf.unsorted_segment_max(points_int, scene_ids, num_scenes)) / 10000. axis_min.set_shape(axis_min.shape.with_rank(1)) axis_max.set_shape(axis_max.shape.with_rank(1)) axis_span = axis_max - axis_min with tf.name_scope("scale_to_image_cube"): max_span = tf.reduce_max(axis_span, axis=-1, keepdims=True) sfactor = tf.divide((self.img_size - 2.0), max_span, name="scale_factor") points = points * tf.gather(sfactor, scene_ids) # update min/max/span to save computation with tf.name_scope("center_objects"): axis_shift = (-axis_min * sfactor + (((self.img_size - 2.0) - axis_span * sfactor) / 2)) gathered_shift = tf.gather(axis_shift, scene_ids) points = points + gathered_shift return points
def make_model(self, mode): super().make_model(mode) with tf.variable_scope("out_layer"): self.ops['final_classifier'] = self.final_classifier() elements_repr = tf.gather( params=self.ops['final_node_representations'], indices=self.placeholders['elements']) elements_pooled = tf.unsorted_segment_sum( data=elements_repr, segment_ids=self.placeholders['elem_graph_nodes_list'], num_segments=self.placeholders['num_graphs_in_batch']) pooled_representations = self.prepare_pooled_node_representations() graph_pooled = pooled_representations - elements_pooled graph_pooled_copies = tf.gather( params=graph_pooled, indices=self.placeholders['graph_nodes_list']) cand_pooled_copies = tf.gather( params=elements_pooled, indices=self.placeholders['graph_nodes_list']) elements_graph = tf.gather(params=graph_pooled_copies, indices=self.placeholders['elements']) elements_pooled = tf.gather(params=cand_pooled_copies, indices=self.placeholders['elements']) elements_concat = tf.concat( [elements_repr, elements_graph, elements_pooled], -1) elements_logits = self.ops['final_classifier'](elements_concat) labels = self.placeholders['select_targets'] # Subsets can be seen as independent binary classification over the elements loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=elements_logits, labels=labels) self.ops['loss'] = tf.reduce_mean(loss) self.ops['probabilities'] = tf.nn.softmax(elements_logits) probabilities = self.ops['probabilities'] correct_prediction_elems = tf.cast( tf.equal(tf.argmax(probabilities, -1), self.placeholders['select_targets']), "float") correct_prediction = tf.unsorted_segment_min( data=correct_prediction_elems, segment_ids=self.placeholders['elem_graph_nodes_list'], num_segments=self.placeholders['num_graphs_in_batch']) self.ops['accuracy_task'] = tf.reduce_mean(correct_prediction) top_k = tf.nn.top_k(probabilities, k=2) self.ops['preds'] = top_k.indices self.ops['probs'] = top_k.values
def message_passing(state, read_ids, write_ids, node_count, name): values = tf.nn.embedding_lookup( state, read_ids, name="lookup_" + name) s_sum = tf.unsorted_segment_sum( values, write_ids, node_count, name="segment_sum_" + name) s_max = tf.unsorted_segment_max( values, write_ids, node_count, name="segment_max_" + name) s_max = tf.maximum(s_max, -1.0) s_min = tf.unsorted_segment_min( values, write_ids, node_count, name="segment_min_" + name) s_min = tf.minimum(s_min, 1.0) return [s_sum, s_max, s_min]
def _project_to_plane(self, axis_mapping, points, object_ids): segment_ids = self._compute_segment_ids(points, axis_mapping, object_ids) with tf.name_scope("depth_values"): world_depth_axis = axis_mapping.img_to_world_axis_name['d'] values = points[axis_index(world_depth_axis)] if "-" in world_depth_axis: values = self.invert(values) values += self.values_shift num_objects = tf.reduce_max(object_ids) + 1 num_segments = tf.to_int32(np.square(self.img_size) * num_objects) segment_ids.set_shape(segment_ids.shape.with_rank(1)) values.set_shape(values.shape.with_rank(1)) segmented_min = tf.unsorted_segment_min(values, segment_ids, num_segments=num_segments) segmented_min.set_shape(segmented_min.shape.with_rank(1)) # set empty pixels to 0 segmented_min = tf.where(segmented_min >= 255.0, tf.zeros_like(segmented_min), segmented_min) shape = (-1, 2, int(self.img_size), int(self.img_size)) return tf.reshape(segmented_min, shape)
def merge_boxes_with_multiple_labels(boxes, classes, confidences, num_classes, quantization_bins=10000): """Merges boxes with same coordinates and returns K-hot encoded classes. Args: boxes: A tf.float32 tensor with shape [N, 4] holding N boxes. Only normalized coordinates are allowed. classes: A tf.int32 tensor with shape [N] holding class indices. The class index starts at 0. confidences: A tf.float32 tensor with shape [N] holding class confidences. num_classes: total number of classes to use for K-hot encoding. quantization_bins: the number of bins used to quantize the box coordinate. Returns: merged_boxes: A tf.float32 tensor with shape [N', 4] holding boxes, where N' <= N. class_encodings: A tf.int32 tensor with shape [N', num_classes] holding K-hot encodings for the merged boxes. confidence_encodings: A tf.float32 tensor with shape [N', num_classes] holding encodings of confidences for the merged boxes. merged_box_indices: A tf.int32 tensor with shape [N'] holding original indices of the boxes. """ boxes_shape = tf.shape(boxes) classes_shape = tf.shape(classes) confidences_shape = tf.shape(confidences) box_class_shape_assert = shape_utils.assert_shape_equal_along_first_dimension( boxes_shape, classes_shape) box_confidence_shape_assert = ( shape_utils.assert_shape_equal_along_first_dimension( boxes_shape, confidences_shape)) box_dimension_assert = tf.assert_equal(boxes_shape[1], 4) box_normalized_assert = shape_utils.assert_box_normalized(boxes) with tf.control_dependencies([ box_class_shape_assert, box_confidence_shape_assert, box_dimension_assert, box_normalized_assert ]): quantized_boxes = tf.to_int64(boxes * (quantization_bins - 1)) ymin, xmin, ymax, xmax = tf.unstack(quantized_boxes, axis=1) hashcodes = ( ymin + xmin * quantization_bins + ymax * quantization_bins * quantization_bins + xmax * quantization_bins * quantization_bins * quantization_bins) unique_hashcodes, unique_indices = tf.unique(hashcodes) num_boxes = tf.shape(boxes)[0] num_unique_boxes = tf.shape(unique_hashcodes)[0] merged_box_indices = tf.unsorted_segment_min(tf.range(num_boxes), unique_indices, num_unique_boxes) merged_boxes = tf.gather(boxes, merged_box_indices) def map_box_encodings(i): """Produces box K-hot and score encodings for each class index.""" box_mask = tf.equal(unique_indices, i * tf.ones(num_boxes, dtype=tf.int32)) box_mask = tf.reshape(box_mask, [-1]) box_indices = tf.boolean_mask(classes, box_mask) box_confidences = tf.boolean_mask(confidences, box_mask) box_class_encodings = tf.sparse_to_dense(box_indices, [num_classes], 1, validate_indices=False) box_confidence_encodings = tf.sparse_to_dense( box_indices, [num_classes], box_confidences, validate_indices=False) return box_class_encodings, box_confidence_encodings class_encodings, confidence_encodings = tf.map_fn( map_box_encodings, tf.range(num_unique_boxes), back_prop=False, dtype=(tf.int32, tf.float32)) merged_boxes = tf.reshape(merged_boxes, [-1, 4]) class_encodings = tf.reshape(class_encodings, [-1, num_classes]) confidence_encodings = tf.reshape(confidence_encodings, [-1, num_classes]) merged_box_indices = tf.reshape(merged_box_indices, [-1]) return (merged_boxes, class_encodings, confidence_encodings, merged_box_indices)
def load_proj_bch(camera_paths, pcl_xyz_paths, pcl_sift_paths, pcl_rgb_paths, crsz, scsz, isval=False, niter=0): bsz = len(camera_paths) proj_depth_batch = [] proj_sift_batch = [] proj_rgb_batch = [] INT32_MAX = 2147483647 INT32_MIN = -2147483648 crxy = tf.random_uniform([bsz, 2], minval=0., maxval=1., seed=niter) for i in range(bsz): # load data from files K, R, T, w, h = load_camera(camera_paths[i]) pcl_xyz = load_bin_file(pcl_xyz_paths[i], tf.float32, [-1, 3]) pcl_sift = tf.cast( load_bin_file(pcl_sift_paths[i], tf.uint8, [-1, 128]), tf.float32) pcl_rgb = tf.cast(load_bin_file(pcl_rgb_paths[i], tf.uint8, [-1, 3]), tf.float32) sc, _, cry, crx = scale_crop(h, w, crxy[i], crsz, scsz, isval, niter) # project pcl P = tf.matmul(K, tf.concat((R, T), axis=1)) xyz_world = tf.concat((pcl_xyz, tf.ones([tf.shape(pcl_xyz)[0], 1])), axis=1) xyz_proj = tf.transpose(tf.matmul(P, tf.transpose(xyz_world))) z = xyz_proj[:, 2] x = xyz_proj[:, 0] / z y = xyz_proj[:, 1] / z mask_x = tf.logical_and(tf.greater(x, -1.), tf.less(x, tf.to_float(w))) mask_y = tf.logical_and(tf.greater(y, -1.), tf.less(y, tf.to_float(h))) mask_z = tf.logical_and(tf.greater(z, 0.), tf.logical_not(tf.is_nan(z))) mask = tf.logical_and(mask_z, tf.logical_and(mask_x, mask_y)) proj_x = tf.boolean_mask(x, mask) proj_y = tf.boolean_mask(y, mask) proj_z = tf.boolean_mask(z, mask) proj_depth = tf.expand_dims(proj_z, axis=1) proj_sift = tf.boolean_mask(pcl_sift, mask, axis=0) proj_rgb = tf.boolean_mask(pcl_rgb, mask, axis=0) # scale pcl proj_x = tf.round(proj_x * sc) proj_y = tf.round(proj_y * sc) proj_z = proj_z * sc h *= sc w *= sc ################# # sort proj tensor by depth (descending order) _, inds_global_sort = tf.nn.top_k(-1. * proj_z, k=tf.shape(proj_z)[0]) proj_x = tf.gather(proj_x, inds_global_sort) proj_y = tf.gather(proj_y, inds_global_sort) # per pixel depth buffer seg_ids = tf.cast(proj_x * tf.cast(w, tf.float32) + proj_y, tf.int32) data = tf.range(tf.shape(seg_ids)[0]) inds_pix_sort = tf.unsorted_segment_min(data, seg_ids, tf.reduce_max(seg_ids)) inds_pix_sort = tf.boolean_mask(inds_pix_sort, tf.less(inds_pix_sort, INT32_MAX)) proj_depth = tf.gather(tf.gather(proj_depth, inds_global_sort), inds_pix_sort) proj_sift = tf.gather(tf.gather(proj_sift, inds_global_sort), inds_pix_sort) proj_rgb = tf.gather(tf.gather(proj_rgb, inds_global_sort), inds_pix_sort) h = tf.cast(h, tf.int32) w = tf.cast(w, tf.int32) proj_yx = tf.cast( tf.concat((proj_y[:, None], proj_x[:, None]), axis=1), tf.int32) proj_yx = tf.gather(proj_yx, inds_pix_sort) proj_depth = tf.scatter_nd(proj_yx, proj_depth, [h, w, 1]) proj_sift = tf.scatter_nd(proj_yx, proj_sift, [h, w, 128]) proj_rgb = tf.scatter_nd(proj_yx, proj_rgb, [h, w, 3]) ################ # crop proj proj_depth = proj_depth[cry:cry + crsz, crx:crx + crsz, :] proj_sift = proj_sift[cry:cry + crsz, crx:crx + crsz, :] proj_rgb = proj_rgb[cry:cry + crsz, crx:crx + crsz, :] # randomly flip proj if not isval: proj_depth = tf.image.random_flip_left_right(proj_depth, seed=niter) proj_sift = tf.image.random_flip_left_right(proj_sift, seed=niter) proj_rgb = tf.image.random_flip_left_right(proj_rgb, seed=niter) proj_depth_batch.append(proj_depth) proj_rgb_batch.append(proj_rgb) proj_sift_batch.append(proj_sift) return proj_depth_batch, proj_sift_batch, proj_rgb_batch
def merge_boxes_with_multiple_labels(boxes, classes, confidences, num_classes, quantization_bins=10000): """Merges boxes with same coordinates and returns K-hot encoded classes. Args: boxes: A tf.float32 tensor with shape [N, 4] holding N boxes. Only normalized coordinates are allowed. classes: A tf.int32 tensor with shape [N] holding class indices. The class index starts at 0. confidences: A tf.float32 tensor with shape [N] holding class confidences. num_classes: total number of classes to use for K-hot encoding. quantization_bins: the number of bins used to quantize the box coordinate. Returns: merged_boxes: A tf.float32 tensor with shape [N', 4] holding boxes, where N' <= N. class_encodings: A tf.int32 tensor with shape [N', num_classes] holding K-hot encodings for the merged boxes. confidence_encodings: A tf.float32 tensor with shape [N', num_classes] holding encodings of confidences for the merged boxes. merged_box_indices: A tf.int32 tensor with shape [N'] holding original indices of the boxes. """ boxes_shape = tf.shape(boxes) classes_shape = tf.shape(classes) confidences_shape = tf.shape(confidences) box_class_shape_assert = shape_utils.assert_shape_equal_along_first_dimension( boxes_shape, classes_shape) box_confidence_shape_assert = ( shape_utils.assert_shape_equal_along_first_dimension( boxes_shape, confidences_shape)) box_dimension_assert = tf.assert_equal(boxes_shape[1], 4) box_normalized_assert = shape_utils.assert_box_normalized(boxes) with tf.control_dependencies( [box_class_shape_assert, box_confidence_shape_assert, box_dimension_assert, box_normalized_assert]): quantized_boxes = tf.to_int64(boxes * (quantization_bins - 1)) ymin, xmin, ymax, xmax = tf.unstack(quantized_boxes, axis=1) hashcodes = ( ymin + xmin * quantization_bins + ymax * quantization_bins * quantization_bins + xmax * quantization_bins * quantization_bins * quantization_bins) unique_hashcodes, unique_indices = tf.unique(hashcodes) num_boxes = tf.shape(boxes)[0] num_unique_boxes = tf.shape(unique_hashcodes)[0] merged_box_indices = tf.unsorted_segment_min( tf.range(num_boxes), unique_indices, num_unique_boxes) merged_boxes = tf.gather(boxes, merged_box_indices) def map_box_encodings(i): """Produces box K-hot and score encodings for each class index.""" box_mask = tf.equal( unique_indices, i * tf.ones(num_boxes, dtype=tf.int32)) box_mask = tf.reshape(box_mask, [-1]) box_indices = tf.boolean_mask(classes, box_mask) box_confidences = tf.boolean_mask(confidences, box_mask) box_class_encodings = tf.sparse_to_dense( box_indices, [num_classes], 1, validate_indices=False) box_confidence_encodings = tf.sparse_to_dense( box_indices, [num_classes], box_confidences, validate_indices=False) return box_class_encodings, box_confidence_encodings class_encodings, confidence_encodings = tf.map_fn( map_box_encodings, tf.range(num_unique_boxes), back_prop=False, dtype=(tf.int32, tf.float32)) merged_boxes = tf.reshape(merged_boxes, [-1, 4]) class_encodings = tf.reshape(class_encodings, [-1, num_classes]) confidence_encodings = tf.reshape(confidence_encodings, [-1, num_classes]) merged_box_indices = tf.reshape(merged_box_indices, [-1]) return (merged_boxes, class_encodings, confidence_encodings, merged_box_indices)