def network_forward(self, point_cloud, bn_decay, img_input): l0_xyz = tf.slice(point_cloud, [0,0,0], [-1,-1,3]) l0_points = tf.slice(point_cloud, [0,0,3], [-1,-1,-1]) num_point = l0_xyz.get_shape().as_list()[1] img_feature_maps = self.build_img_extractor(img_input) pts2d = projection.tf_rect_to_image(tf.slice(point_cloud, [0, 0, 0], [-1, -1, 3]), self.placeholders[maps_dict.PL_CALIB_P2]) pts2d = tf.cast(pts2d, tf.int32) # (B,N,2) indices = tf.concat([ tf.expand_dims(tf.tile(tf.range(0, self.batch_size), [num_point]), axis=-1), # (B*N, 1) tf.reshape(pts2d, [self.batch_size * num_point, 2]) ], axis=-1) # (B*N,3) indices = tf.gather(indices, [0, 2, 1], axis=-1) # image's shape is (y,x) point_img_feats = tf.reshape(tf.gather_nd(img_feature_maps, indices), # (B*N,C) [self.batch_size, num_point, -1]) # (B,N,C) xyz_list, feature_list, fps_idx_list, point_img_feats_list = [l0_xyz], [l0_points], [None], [point_img_feats] for layer in self.layers: xyz_list, feature_list, fps_idx_list, point_img_feats_list = layer.build_layer(xyz_list, feature_list, fps_idx_list, bn_decay, self.output, point_img_feats_list) cur_head_start_idx = len(self.output[maps_dict.KEY_OUTPUT_XYZ]) for head in self.heads: head.build_layer(xyz_list, feature_list, bn_decay, self.output) merge_head_prediction(cur_head_start_idx, self.output, self.prediction_keys)
def get_seg_softmax(self): point_cloud = self.placeholders['pointclouds'] mask_label = self.placeholders['seg_labels'] end_points = self.end_points img_seg = self.graph.get_tensor_by_name( 'deeplab_v3/SemanticPredictions:0') # (B,360,1200,NUM_CLASSES) pts2d = projection.tf_rect_to_image( tf.slice(point_cloud, [0, 0, 0], [-1, -1, 3]), self.placeholders['calib']) pts2d = tf.cast(pts2d, tf.int32) #(B,N,2) indices = tf.concat( [ tf.expand_dims(tf.tile(tf.range(0, self.batch_size), [self.num_point]), axis=-1), # (B*N, 1) tf.reshape(pts2d, [self.batch_size * self.num_point, 2]) ], axis=-1) # (B*N,3) indices = tf.gather(indices, [0, 2, 1], axis=-1) # image's shape is (y,x) point_softmax = tf.reshape( tf.gather_nd(img_seg, indices), # (B*N,NUM_CLASSES) [self.batch_size, self.num_point, -1]) # (B,N,NUM_CLASSES) return point_softmax
def build(self): point_cloud = self.placeholders['pointclouds'] is_training = self.placeholders['is_training_pl'] mask_label = self.placeholders['seg_labels'] bn_decay = self.bn_decay end_points = self.end_points #with tf.device('/gpu:0'): img_feature_maps = self.build_img_extractor() # (B,360,1200,C) pts2d = projection.tf_rect_to_image( tf.slice(point_cloud, [0, 0, 0], [-1, -1, 3]), self.placeholders['calib']) pts2d = tf.cast(pts2d, tf.int32) #(B,N,2) indices = tf.concat( [ tf.expand_dims(tf.tile(tf.range(0, self.batch_size), [self.num_point]), axis=-1), # (B*N, 1) tf.reshape(pts2d, [self.batch_size * self.num_point, 2]) ], axis=-1) # (B*N,3) indices = tf.gather(indices, [0, 2, 1], axis=-1) # image's shape is (y,x) end_points['point_img_feats'] = tf.reshape( tf.gather_nd(img_feature_maps, indices), # (B*N,C) [self.batch_size, self.num_point, -1]) # (B,N,C) end_points = self.get_segmentation_net(point_cloud, is_training, bn_decay, end_points) #with tf.device('/gpu:1'): #seg_softmax = tf.nn.softmax(end_points['foreground_logits'], axis=-1) + self.placeholders['img_seg_softmax'] seg_softmax = tf.nn.softmax(end_points['foreground_logits'], axis=-1) seg_logits = tf.cond(is_training, lambda: tf.one_hot(mask_label, NUM_SEG_CLASSES), lambda: seg_softmax) #end_points['point_feats_fuse'] = tf.concat([end_points['point_feats_fuse'], seg_logits], axis=-1) # fg_point_feats include xyz fg_point_feats, end_points = point_cloud_masking( end_points['point_feats'], seg_logits, end_points, xyz_only=False) # BxNUM_FG_POINTxD proposals = self.get_region_proposal_net(fg_point_feats, is_training, bn_decay, end_points) proposals_reshaped = tf.reshape(proposals, [self.batch_size, NUM_FG_POINT, -1]) # Parse output to 3D box parameters end_points = self.parse_output_to_tensors(proposals_reshaped, end_points) end_points = self.reduce_proposals(end_points) # for iou eval end_points['gt_box_of_point'] = tf.gather_nd( self.placeholders['gt_box_of_point'], end_points['fg_point_indices']) end_points['gt_box_of_point'].set_shape( [self.batch_size, NUM_FG_POINT, 8, 3]) return end_points
while (True): batch_data, is_last_batch = dataset.get_next_batch(1, need_id=True) with tf.Session() as sess: img_vgg = ImgVggPyr( VGG_config( **{ 'vgg_conv1': [2, 32], 'vgg_conv2': [2, 64], 'vgg_conv3': [3, 128], 'vgg_conv4': [3, 256], 'l2_weight_decay': 0.0005 })) print(batch_data['calib']) pts2d = projection.tf_rect_to_image( tf.slice(batch_data['pointcloud'], [0, 0, 0], [-1, -1, 3]), batch_data['calib']) pts2d = tf.cast(pts2d, tf.int32) #(B,N,2) p2d = sess.run(pts2d) print(np.amax(p2d, axis=1)) print(np.amin(p2d, axis=1)) # break indices = tf.concat( [ tf.expand_dims(tf.tile(tf.range(0, 1), [npoints]), axis=-1), # (B*N, 1) tf.reshape(pts2d, [1 * npoints, 2]) ], axis=-1) # (B*N,3) indices = tf.gather(indices, [0, 2, 1], axis=-1) # image's shape is (y,x)
def build(self): point_cloud = self.placeholders['pointclouds'] self._img_pixel_size = np.asarray([360, 1200]) bn_decay = self.bn_decay is_training = self.placeholders['is_training_pl'] VGG_config = namedtuple( 'VGG_config', 'vgg_conv1 vgg_conv2 vgg_conv3 vgg_conv4 l2_weight_decay') self._img_feature_extractor = ImgVggPyr( VGG_config( **{ 'vgg_conv1': [2, 32], 'vgg_conv2': [2, 64], 'vgg_conv3': [3, 128], 'vgg_conv4': [3, 256], 'l2_weight_decay': 0.0005 })) self._img_preprocessed = \ self._img_feature_extractor.preprocess_input( self.placeholders['img_inputs'], self._img_pixel_size) self.img_feature_maps, self.img_end_points = \ self._img_feature_extractor.build( self._img_preprocessed, self._img_pixel_size, self.is_training) ''' self.seg_logits = slim.conv2d( self.img_feature_maps, NUM_SEG_CLASSES, [1, 1], scope='bottleneck', normalizer_fn=slim.batch_norm, #normalizer_fn=None, normalizer_params={ 'is_training': self.is_training}) ''' pts2d = projection.tf_rect_to_image( tf.slice(point_cloud, [0, 0, 0], [-1, -1, 3]), self.placeholders['calib']) pts2d = tf.cast(pts2d, tf.int32) #(B,N,2) indices = tf.concat( [ tf.expand_dims(tf.tile(tf.range(0, self.batch_size), [self.num_point]), axis=-1), # (B*N, 1) tf.reshape(pts2d, [self.batch_size * self.num_point, 2]) ], axis=-1) # (B*N,3) indices = tf.gather(indices, [0, 2, 1], axis=-1) # image's shape is (y,x) self.end_points['point_img_feats'] = tf.reshape( tf.gather_nd(self.img_feature_maps, indices), # (B*N,C) [self.batch_size, self.num_point, -1]) # (B,N,C) net = tf_util.conv1d(self.end_points['point_img_feats'], 128, 1, padding='VALID', bn=True, is_training=is_training, scope='img-seg-conv1d-fc1', bn_decay=bn_decay) net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training, scope='img-seg-dp1') logits = tf_util.conv1d(net, NUM_SEG_CLASSES, 1, padding='VALID', activation_fn=None, scope='img-seg-conv1d-fc2') self.end_points['foreground_logits'] = logits
def network_forward(self, point_cloud, bn_decay, img_input, img_full_seg): l0_xyz = tf.slice(point_cloud, [0, 0, 0], [-1, -1, 3]) l0_points = tf.slice(point_cloud, [0, 0, 3], [-1, -1, -1]) num_point = l0_xyz.get_shape().as_list()[1] batch_size = l0_xyz.get_shape().as_list()[0] img_full_seg = tf.reshape(img_full_seg, [batch_size, 360, 1200, 1]) pts2d = projection.tf_rect_to_image( tf.slice(point_cloud, [0, 0, 0], [-1, -1, 3]), self.placeholders[maps_dict.PL_CALIB_P2]) pts2d = tf.cast(pts2d, tf.int32) # (B,N,2) indices = tf.concat( [ tf.expand_dims(tf.tile(tf.range(0, self.batch_size), [num_point]), axis=-1), # (B*N, 1) tf.reshape(pts2d, [self.batch_size * num_point, 2]) ], axis=-1) # (B*N,3) indices = tf.gather(indices, [0, 2, 1], axis=-1) # image's shape is (y,x) img_full_seg = tf.reshape( tf.gather_nd(img_full_seg, indices), # (B*N,C) [self.batch_size, num_point, -1]) # (B,N,C) nsamples = 256 img_seg_npoints = 256 pooling_size = [] if self.cls_list[0] == 'Car': cls_int = 1 pooling_size = [5.0, 1.7, 5.0] elif self.cls_list[0] == 'Pedestrian': cls_int = 2 pooling_size = [1.2, 1.8, 1.2] elif self.cls_list[0] == 'Cyclist': cls_int = 3 pooling_size = [1.8, 1.8, 1.8] mask = tf.equal(img_full_seg, cls_int) mask = tf.reshape(mask, [self.batch_size, num_point]) img_seg_masked, indices = tf_gather_object_pc(img_full_seg, mask, npoints=img_seg_npoints) img_seg_masked.set_shape([batch_size, img_seg_npoints, 1]) img_seg_point_cloud = tf.gather_nd(l0_xyz, indices) img_seg_point_cloud.set_shape([batch_size, img_seg_npoints, 3]) img_input = tf.image.resize_images( img_input, [360, 1200], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR, align_corners=True) xyz_list, feature_list, fps_idx_list = [l0_xyz], [l0_points], [None] point_seg_net = None for layer in self.layers: if layer.layer_type == 'Vote_Layer': l3_points = pointnet_fp_module(xyz_list[2], xyz_list[4], feature_list[2], feature_list[4], [256], layer.is_training, bn_decay, scope='fa_layer1') l2_points = pointnet_fp_module(xyz_list[1], xyz_list[2], feature_list[1], l3_points, [256], layer.is_training, bn_decay, scope='fa_layer2') l1_points = pointnet_fp_module(xyz_list[0], xyz_list[1], feature_list[0], l2_points, [256], layer.is_training, bn_decay, scope='fa_layer3') # net = tf_util.conv1d(l1_points, 128, 1, padding='VALID', bn=True, # is_training=layer.is_training, scope='img-seg-conv1d-fc1', bn_decay=bn_decay) # net = tf_util.dropout(net, keep_prob=0.7, is_training=layer.is_training, scope='img-seg-dp1') # logits = tf_util.conv1d(net, 2, 1, padding='VALID', activation_fn=None, scope='img-seg-conv1d-fc2') # self.output[maps_dict.PRED_POINT_SEG].append(logits) point_seg_net = tf.gather_nd(l1_points, indices) point_seg_net.set_shape([batch_size, img_seg_npoints, 256]) xyz_list, feature_list, fps_idx_list = layer.build_layer( xyz_list, feature_list, fps_idx_list, bn_decay, self.output, self.placeholders[maps_dict.PL_CALIB_P2], img_input, img_seg_point_cloud, point_seg_net, pooling_size) cur_head_start_idx = len(self.output[maps_dict.KEY_OUTPUT_XYZ]) for head in self.heads: head.build_layer(xyz_list, feature_list, bn_decay, self.output) merge_head_prediction(cur_head_start_idx, self.output, self.prediction_keys)