def __init__(self, batch_size, is_training): self.batch_size = batch_size self.is_training = is_training # placeholders self.placeholders_builder = PlaceHolders(self.batch_size) self.placeholders_builder.get_placeholders() self.placeholders = self.placeholders_builder.placeholders self.cls_list = cfg.DATASET.KITTI.CLS_LIST self.cls2idx = dict([(cls, i + 1) for i, cls in enumerate(self.cls_list)]) self.idx2cls = dict([(i + 1, cls) for i, cls in enumerate(self.cls_list)]) # anchor_builder self.anchor_builder = Anchors(0, self.cls_list) # encoder_decoder self.encoder_decoder = EncoderDecoder(0) # postprocessor self.postprocessor = PostProcessor(0, len(self.cls_list)) # loss builder self.loss_builder = LossBuilder(0) self.corner_loss = cfg.MODEL.FIRST_STAGE.CORNER_LOSS # head builder self.iou_loss = False self.heads = [] head_cfg = cfg.MODEL.NETWORK.FIRST_STAGE.HEAD for i in range(len(head_cfg)): self.heads.append( HeadBuilder(self.batch_size, self.anchor_builder.anchors_num, 0, head_cfg[i], is_training)) if self.heads[-1].layer_type == 'IoU': self.iou_loss = True # target assigner self.target_assigner = TargetAssigner(0) # first stage self.vote_loss = False # layer builder layer_cfg = cfg.MODEL.NETWORK.FIRST_STAGE.ARCHITECTURE layers = [] for i in range(len(layer_cfg)): layers.append(LayerBuilder(i, self.is_training, layer_cfg)) if layers[-1].layer_type == 'Vote_Layer': self.vote_loss = True self.layers = layers self.attr_velo_loss = cfg.MODEL.FIRST_STAGE.PREDICT_ATTRIBUTE_AND_VELOCITY self.__init_dict()
class SingleStageDetector: def __init__(self, batch_size, is_training): self.batch_size = batch_size self.is_training = is_training # placeholders self.placeholders_builder = PlaceHolders(self.batch_size) self.placeholders_builder.get_placeholders() self.placeholders = self.placeholders_builder.placeholders self.cls_list = cfg.DATASET.KITTI.CLS_LIST self.cls2idx = dict([(cls, i + 1) for i, cls in enumerate(self.cls_list)]) self.idx2cls = dict([(i + 1, cls) for i, cls in enumerate(self.cls_list)]) # anchor_builder self.anchor_builder = Anchors(0, self.cls_list) # encoder_decoder self.encoder_decoder = EncoderDecoder(0) # postprocessor self.postprocessor = PostProcessor(0, len(self.cls_list)) # loss builder self.loss_builder = LossBuilder(0) self.corner_loss = cfg.MODEL.FIRST_STAGE.CORNER_LOSS # head builder self.iou_loss = False self.heads = [] head_cfg = cfg.MODEL.NETWORK.FIRST_STAGE.HEAD for i in range(len(head_cfg)): self.heads.append(HeadBuilder(self.batch_size, self.anchor_builder.anchors_num, 0, head_cfg[i], is_training)) if self.heads[-1].layer_type == 'IoU': self.iou_loss = True # target assigner self.target_assigner = TargetAssigner(0) # first stage self.vote_loss = False # layer builder layer_cfg = cfg.MODEL.NETWORK.FIRST_STAGE.ARCHITECTURE layers = [] for i in range(len(layer_cfg)): layers.append(LayerBuilder(i, self.is_training, layer_cfg)) if layers[-1].layer_type == 'Vote_Layer': self.vote_loss = True self.layers = layers self.attr_velo_loss = cfg.MODEL.FIRST_STAGE.PREDICT_ATTRIBUTE_AND_VELOCITY self.__init_dict() def __init_dict(self): self.output = dict() # sampled xyz/feature self.output[maps_dict.KEY_OUTPUT_XYZ] = [] self.output[maps_dict.KEY_OUTPUT_FEATURE] = [] # generated anchors self.output[maps_dict.KEY_ANCHORS_3D] = [] # generated anchors # vote output self.output[maps_dict.PRED_VOTE_OFFSET] = [] self.output[maps_dict.PRED_VOTE_BASE] = [] # det output self.output[maps_dict.PRED_CLS] = [] self.output[maps_dict.PRED_OFFSET] = [] self.output[maps_dict.PRED_ANGLE_CLS] = [] self.output[maps_dict.PRED_ANGLE_RES] = [] self.output[maps_dict.CORNER_LOSS_PRED_BOXES_CORNERS] = [] self.output[maps_dict.PRED_ATTRIBUTE] = [] self.output[maps_dict.PRED_VELOCITY] = [] # iou output self.output[maps_dict.PRED_IOU_3D_VALUE] = [] # final result self.output[maps_dict.PRED_3D_BBOX] = [] self.output[maps_dict.PRED_3D_SCORE] = [] self.output[maps_dict.PRED_3D_CLS_CATEGORY] = [] self.output[maps_dict.PRED_3D_ATTRIBUTE] = [] self.output[maps_dict.PRED_3D_VELOCITY] = [] self.prediction_keys = self.output.keys() self.labels = dict() self.labels[maps_dict.GT_CLS] = [] self.labels[maps_dict.GT_OFFSET] = [] self.labels[maps_dict.GT_ANGLE_CLS] = [] self.labels[maps_dict.GT_ANGLE_RES] = [] self.labels[maps_dict.GT_ATTRIBUTE] = [] self.labels[maps_dict.GT_VELOCITY] = [] self.labels[maps_dict.GT_BOXES_ANCHORS_3D] = [] self.labels[maps_dict.GT_IOU_3D_VALUE] = [] self.labels[maps_dict.GT_PMASK] = [] self.labels[maps_dict.GT_NMASK] = [] self.labels[maps_dict.CORNER_LOSS_GT_BOXES_CORNERS] = [] def build_img_extractor(self, img_input): self._img_pixel_size = np.asarray([360, 1200]) VGG_config = namedtuple('VGG_config', 'vgg_conv1 vgg_conv2 vgg_conv3 vgg_conv4 l2_weight_decay') self._img_feature_extractor = ImgVggPyr(VGG_config(**{ 'vgg_conv1': [2, 32], 'vgg_conv2': [2, 64], 'vgg_conv3': [3, 128], 'vgg_conv4': [3, 256], 'l2_weight_decay': 0.0005 })) self._img_preprocessed = \ self._img_feature_extractor.preprocess_input(img_input, self._img_pixel_size) # self._img_preprocessed = img_input self.img_feature_maps, self.img_end_points = \ self._img_feature_extractor.build( self._img_preprocessed, self._img_pixel_size, self.is_training) #return self.img_feature_maps self.img_bottleneck = slim.conv2d( self.img_feature_maps, 128, [1, 1], #2, [1, 1], scope='bottleneck', normalizer_fn=slim.batch_norm, #normalizer_fn=None, normalizer_params={ 'is_training': self.is_training}) return self.img_bottleneck def network_forward(self, point_cloud, bn_decay, img_input): l0_xyz = tf.slice(point_cloud, [0,0,0], [-1,-1,3]) l0_points = tf.slice(point_cloud, [0,0,3], [-1,-1,-1]) num_point = l0_xyz.get_shape().as_list()[1] img_feature_maps = self.build_img_extractor(img_input) pts2d = projection.tf_rect_to_image(tf.slice(point_cloud, [0, 0, 0], [-1, -1, 3]), self.placeholders[maps_dict.PL_CALIB_P2]) pts2d = tf.cast(pts2d, tf.int32) # (B,N,2) indices = tf.concat([ tf.expand_dims(tf.tile(tf.range(0, self.batch_size), [num_point]), axis=-1), # (B*N, 1) tf.reshape(pts2d, [self.batch_size * num_point, 2]) ], axis=-1) # (B*N,3) indices = tf.gather(indices, [0, 2, 1], axis=-1) # image's shape is (y,x) point_img_feats = tf.reshape(tf.gather_nd(img_feature_maps, indices), # (B*N,C) [self.batch_size, num_point, -1]) # (B,N,C) xyz_list, feature_list, fps_idx_list, point_img_feats_list = [l0_xyz], [l0_points], [None], [point_img_feats] for layer in self.layers: xyz_list, feature_list, fps_idx_list, point_img_feats_list = layer.build_layer(xyz_list, feature_list, fps_idx_list, bn_decay, self.output, point_img_feats_list) cur_head_start_idx = len(self.output[maps_dict.KEY_OUTPUT_XYZ]) for head in self.heads: head.build_layer(xyz_list, feature_list, bn_decay, self.output) merge_head_prediction(cur_head_start_idx, self.output, self.prediction_keys) def model_forward(self, bn_decay=None): points_input_det = self.placeholders[maps_dict.PL_POINTS_INPUT] img_input_det = self.placeholders[maps_dict.PL_IMG_INPUT] # forward the point cloud self.network_forward(points_input_det, bn_decay, img_input_det) # generate anchors base_xyz = self.output[maps_dict.KEY_OUTPUT_XYZ][-1] anchors = self.anchor_builder.generate(base_xyz) # [bs, pts_num, 1/cls_num, 7] self.output[maps_dict.KEY_ANCHORS_3D].append(anchors) if self.is_training: # training mode self.train_forward(-1, anchors) else: # testing mode self.test_forward(-1, anchors) def train_forward(self, index, anchors): """ Calculating loss """ base_xyz = self.output[maps_dict.KEY_OUTPUT_XYZ][index] pred_offset = self.output[maps_dict.PRED_OFFSET][index] pred_angle_cls = self.output[maps_dict.PRED_ANGLE_CLS][index] pred_angle_res = self.output[maps_dict.PRED_ANGLE_RES][index] gt_boxes_3d = self.placeholders[maps_dict.PL_LABEL_BOXES_3D] gt_classes = self.placeholders[maps_dict.PL_LABEL_CLASSES] gt_angle_cls = self.placeholders[maps_dict.PL_ANGLE_CLS] gt_angle_res = self.placeholders[maps_dict.PL_ANGLE_RESIDUAL] if maps_dict.PL_LABEL_ATTRIBUTES in self.placeholders.keys(): gt_attributes = self.placeholders[maps_dict.PL_LABEL_ATTRIBUTES] else: gt_attributes = None if maps_dict.PL_LABEL_VELOCITY in self.placeholders.keys(): gt_velocity = self.placeholders[maps_dict.PL_LABEL_VELOCITY] else: gt_velocity = None returned_list = self.target_assigner.assign(base_xyz, anchors, gt_boxes_3d, gt_classes, gt_angle_cls, gt_angle_res, gt_velocity, gt_attributes) assigned_idx, assigned_pmask, assigned_nmask, assigned_gt_boxes_3d, assigned_gt_labels, assigned_gt_angle_cls, assigned_gt_angle_res, assigned_gt_velocity, assigned_gt_attribute = returned_list # encode offset assigned_gt_offset, assigned_gt_angle_cls, assigned_gt_angle_res = self.encoder_decoder.encode(base_xyz, assigned_gt_boxes_3d, anchors) # corner_loss corner_loss_angle_cls = tf.cast(tf.one_hot(assigned_gt_angle_cls, depth=cfg.MODEL.ANGLE_CLS_NUM, on_value=1, off_value=0, axis=-1), tf.float32) # bs, pts_num, cls_num, -1 pred_anchors_3d = self.encoder_decoder.decode(base_xyz, pred_offset, corner_loss_angle_cls, pred_angle_res, self.is_training, anchors) # [bs, points_num, cls_num, 7] pred_corners = transfer_box3d_to_corners(pred_anchors_3d) # [bs, points_num, cls_num, 8, 3] gt_corners = transfer_box3d_to_corners(assigned_gt_boxes_3d) # [bs, points_num, cls_num,8,3] self.output[maps_dict.CORNER_LOSS_PRED_BOXES_CORNERS].append(pred_corners) self.labels[maps_dict.CORNER_LOSS_GT_BOXES_CORNERS].append(gt_corners) self.labels[maps_dict.GT_CLS].append(assigned_gt_labels) self.labels[maps_dict.GT_BOXES_ANCHORS_3D].append(assigned_gt_boxes_3d) self.labels[maps_dict.GT_OFFSET].append(assigned_gt_offset) self.labels[maps_dict.GT_ANGLE_CLS].append(assigned_gt_angle_cls) self.labels[maps_dict.GT_ANGLE_RES].append(assigned_gt_angle_res) self.labels[maps_dict.GT_ATTRIBUTE].append(assigned_gt_attribute) self.labels[maps_dict.GT_VELOCITY].append(assigned_gt_velocity) self.labels[maps_dict.GT_PMASK].append(assigned_pmask) self.labels[maps_dict.GT_NMASK].append(assigned_nmask) self.loss_builder.forward(index, self.labels, self.output, self.placeholders, self.corner_loss, self.vote_loss, self.attr_velo_loss, self.iou_loss) def test_forward(self, index, anchors): base_xyz = self.output[maps_dict.KEY_OUTPUT_XYZ][index] pred_cls = self.output[maps_dict.PRED_CLS][index] # [bs, points_num, cls_num + 1/0] pred_offset = self.output[maps_dict.PRED_OFFSET][index] pred_angle_cls = self.output[maps_dict.PRED_ANGLE_CLS][index] pred_angle_res = self.output[maps_dict.PRED_ANGLE_RES][index] # decode predictions pred_anchors_3d = self.encoder_decoder.decode(base_xyz, pred_offset, pred_angle_cls, pred_angle_res, self.is_training, anchors) # [bs, points_num, cls_num, 7] # decode classification if cfg.MODEL.FIRST_STAGE.CLS_ACTIVATION == 'Softmax': # softmax pred_score = tf.nn.softmax(pred_cls) pred_score = tf.slice(pred_score, [0, 0, 1], [-1, -1, -1]) else: # sigmoid pred_score = tf.nn.sigmoid(pred_cls) # using IoU branch proposed by sparse-to-dense if self.iou_loss: pred_iou = self.output[maps_dict.PRED_IOU_3D_VALUE][index] pred_score = pred_score * pred_iou if len(self.output[maps_dict.PRED_ATTRIBUTE]) <= 0: pred_attribute = None else: pred_attribute = self.output[maps_dict.PRED_ATTRIBUTE][index] if len(self.output[maps_dict.PRED_VELOCITY]) <= 0: pred_velocity = None else: pred_velocity = self.output[maps_dict.PRED_VELOCITY][index] self.postprocessor.forward(pred_anchors_3d, pred_score, self.output, pred_attribute, pred_velocity)
class SingleStageDetector: def __init__(self, batch_size, is_training): self.batch_size = batch_size self.is_training = is_training # placeholders self.placeholders_builder = PlaceHolders(self.batch_size) self.placeholders_builder.get_placeholders() self.placeholders = self.placeholders_builder.placeholders self.cls_list = cfg.DATASET.KITTI.CLS_LIST self.cls2idx = dict([(cls, i + 1) for i, cls in enumerate(self.cls_list)]) self.idx2cls = dict([(i + 1, cls) for i, cls in enumerate(self.cls_list)]) # anchor_builder self.anchor_builder = Anchors(0, self.cls_list) # encoder_decoder self.encoder_decoder = EncoderDecoder(0) # postprocessor self.postprocessor = PostProcessor(0, len(self.cls_list)) # loss builder self.loss_builder = LossBuilder(0) self.corner_loss = cfg.MODEL.FIRST_STAGE.CORNER_LOSS # head builder self.iou_loss = False self.heads = [] head_cfg = cfg.MODEL.NETWORK.FIRST_STAGE.HEAD for i in range(len(head_cfg)): self.heads.append( HeadBuilder(self.batch_size, self.anchor_builder.anchors_num, 0, head_cfg[i], is_training)) if self.heads[-1].layer_type == 'IoU': self.iou_loss = True # target assigner self.target_assigner = TargetAssigner(0) # first stage self.vote_loss = False # layer builder layer_cfg = cfg.MODEL.NETWORK.FIRST_STAGE.ARCHITECTURE layers = [] for i in range(len(layer_cfg)): layers.append(LayerBuilder(i, self.is_training, layer_cfg)) if layers[-1].layer_type == 'Vote_Layer': self.vote_loss = True self.layers = layers self.attr_velo_loss = cfg.MODEL.FIRST_STAGE.PREDICT_ATTRIBUTE_AND_VELOCITY self.__init_dict() def __init_dict(self): self.output = dict() # sampled xyz/feature self.output[maps_dict.KEY_OUTPUT_XYZ] = [] self.output[maps_dict.KEY_OUTPUT_FEATURE] = [] # generated anchors self.output[maps_dict.KEY_ANCHORS_3D] = [] # generated anchors # vote output self.output[maps_dict.PRED_VOTE_OFFSET] = [] self.output[maps_dict.PRED_VOTE_BASE] = [] # det output self.output[maps_dict.PRED_CLS] = [] self.output[maps_dict.PRED_OFFSET] = [] self.output[maps_dict.PRED_ANGLE_CLS] = [] self.output[maps_dict.PRED_ANGLE_RES] = [] self.output[maps_dict.CORNER_LOSS_PRED_BOXES_CORNERS] = [] self.output[maps_dict.PRED_ATTRIBUTE] = [] self.output[maps_dict.PRED_VELOCITY] = [] # iou output self.output[maps_dict.PRED_IOU_3D_VALUE] = [] # final result self.output[maps_dict.PRED_3D_BBOX] = [] self.output[maps_dict.PRED_3D_SCORE] = [] self.output[maps_dict.PRED_3D_CLS_CATEGORY] = [] self.output[maps_dict.PRED_3D_ATTRIBUTE] = [] self.output[maps_dict.PRED_3D_VELOCITY] = [] self.output[maps_dict.PRED_POINT_SEG] = [] self.prediction_keys = self.output.keys() self.labels = dict() self.labels[maps_dict.GT_CLS] = [] self.labels[maps_dict.GT_OFFSET] = [] self.labels[maps_dict.GT_ANGLE_CLS] = [] self.labels[maps_dict.GT_ANGLE_RES] = [] self.labels[maps_dict.GT_ATTRIBUTE] = [] self.labels[maps_dict.GT_VELOCITY] = [] self.labels[maps_dict.GT_BOXES_ANCHORS_3D] = [] self.labels[maps_dict.GT_IOU_3D_VALUE] = [] self.labels[maps_dict.GT_PMASK] = [] self.labels[maps_dict.GT_NMASK] = [] self.labels[maps_dict.CORNER_LOSS_GT_BOXES_CORNERS] = [] self.labels[maps_dict.PL_LABEL_SEMSEGS] = [] def network_forward(self, point_cloud, bn_decay, img_input, img_full_seg): l0_xyz = tf.slice(point_cloud, [0, 0, 0], [-1, -1, 3]) l0_points = tf.slice(point_cloud, [0, 0, 3], [-1, -1, -1]) num_point = l0_xyz.get_shape().as_list()[1] batch_size = l0_xyz.get_shape().as_list()[0] img_full_seg = tf.reshape(img_full_seg, [batch_size, 360, 1200, 1]) pts2d = projection.tf_rect_to_image( tf.slice(point_cloud, [0, 0, 0], [-1, -1, 3]), self.placeholders[maps_dict.PL_CALIB_P2]) pts2d = tf.cast(pts2d, tf.int32) # (B,N,2) indices = tf.concat( [ tf.expand_dims(tf.tile(tf.range(0, self.batch_size), [num_point]), axis=-1), # (B*N, 1) tf.reshape(pts2d, [self.batch_size * num_point, 2]) ], axis=-1) # (B*N,3) indices = tf.gather(indices, [0, 2, 1], axis=-1) # image's shape is (y,x) img_full_seg = tf.reshape( tf.gather_nd(img_full_seg, indices), # (B*N,C) [self.batch_size, num_point, -1]) # (B,N,C) nsamples = 256 img_seg_npoints = 256 pooling_size = [] if self.cls_list[0] == 'Car': cls_int = 1 pooling_size = [5.0, 1.7, 5.0] elif self.cls_list[0] == 'Pedestrian': cls_int = 2 pooling_size = [1.2, 1.8, 1.2] elif self.cls_list[0] == 'Cyclist': cls_int = 3 pooling_size = [1.8, 1.8, 1.8] mask = tf.equal(img_full_seg, cls_int) mask = tf.reshape(mask, [self.batch_size, num_point]) img_seg_masked, indices = tf_gather_object_pc(img_full_seg, mask, npoints=img_seg_npoints) img_seg_masked.set_shape([batch_size, img_seg_npoints, 1]) img_seg_point_cloud = tf.gather_nd(l0_xyz, indices) img_seg_point_cloud.set_shape([batch_size, img_seg_npoints, 3]) img_input = tf.image.resize_images( img_input, [360, 1200], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR, align_corners=True) xyz_list, feature_list, fps_idx_list = [l0_xyz], [l0_points], [None] point_seg_net = None for layer in self.layers: if layer.layer_type == 'Vote_Layer': l3_points = pointnet_fp_module(xyz_list[2], xyz_list[4], feature_list[2], feature_list[4], [256], layer.is_training, bn_decay, scope='fa_layer1') l2_points = pointnet_fp_module(xyz_list[1], xyz_list[2], feature_list[1], l3_points, [256], layer.is_training, bn_decay, scope='fa_layer2') l1_points = pointnet_fp_module(xyz_list[0], xyz_list[1], feature_list[0], l2_points, [256], layer.is_training, bn_decay, scope='fa_layer3') # net = tf_util.conv1d(l1_points, 128, 1, padding='VALID', bn=True, # is_training=layer.is_training, scope='img-seg-conv1d-fc1', bn_decay=bn_decay) # net = tf_util.dropout(net, keep_prob=0.7, is_training=layer.is_training, scope='img-seg-dp1') # logits = tf_util.conv1d(net, 2, 1, padding='VALID', activation_fn=None, scope='img-seg-conv1d-fc2') # self.output[maps_dict.PRED_POINT_SEG].append(logits) point_seg_net = tf.gather_nd(l1_points, indices) point_seg_net.set_shape([batch_size, img_seg_npoints, 256]) xyz_list, feature_list, fps_idx_list = layer.build_layer( xyz_list, feature_list, fps_idx_list, bn_decay, self.output, self.placeholders[maps_dict.PL_CALIB_P2], img_input, img_seg_point_cloud, point_seg_net, pooling_size) cur_head_start_idx = len(self.output[maps_dict.KEY_OUTPUT_XYZ]) for head in self.heads: head.build_layer(xyz_list, feature_list, bn_decay, self.output) merge_head_prediction(cur_head_start_idx, self.output, self.prediction_keys) def model_forward(self, bn_decay=None): points_input_det = self.placeholders[maps_dict.PL_POINTS_INPUT] img_input_det = self.placeholders[maps_dict.PL_IMG_INPUT] img_full_seg = self.placeholders[maps_dict.PL_IMG_FULL_SEG_INPUT] # forward the point cloud self.network_forward(points_input_det, bn_decay, img_input_det, img_full_seg) # generate anchors base_xyz = self.output[maps_dict.KEY_OUTPUT_XYZ][-1] anchors = self.anchor_builder.generate( base_xyz) # [bs, pts_num, 1/cls_num, 7] self.output[maps_dict.KEY_ANCHORS_3D].append(anchors) if self.is_training: # training mode self.train_forward(-1, anchors) else: # testing mode self.test_forward(-1, anchors) def train_forward(self, index, anchors): """ Calculating loss """ base_xyz = self.output[maps_dict.KEY_OUTPUT_XYZ][index] pred_offset = self.output[maps_dict.PRED_OFFSET][index] pred_angle_cls = self.output[maps_dict.PRED_ANGLE_CLS][index] pred_angle_res = self.output[maps_dict.PRED_ANGLE_RES][index] gt_boxes_3d = self.placeholders[maps_dict.PL_LABEL_BOXES_3D] gt_classes = self.placeholders[maps_dict.PL_LABEL_CLASSES] gt_angle_cls = self.placeholders[maps_dict.PL_ANGLE_CLS] gt_angle_res = self.placeholders[maps_dict.PL_ANGLE_RESIDUAL] # point_sem_labels = self.placeholders[maps_dict.PL_LABEL_SEMSEGS] if maps_dict.PL_LABEL_ATTRIBUTES in self.placeholders.keys(): gt_attributes = self.placeholders[maps_dict.PL_LABEL_ATTRIBUTES] else: gt_attributes = None if maps_dict.PL_LABEL_VELOCITY in self.placeholders.keys(): gt_velocity = self.placeholders[maps_dict.PL_LABEL_VELOCITY] else: gt_velocity = None returned_list = self.target_assigner.assign(base_xyz, anchors, gt_boxes_3d, gt_classes, gt_angle_cls, gt_angle_res, gt_velocity, gt_attributes) assigned_idx, assigned_pmask, assigned_nmask, assigned_gt_boxes_3d, assigned_gt_labels, assigned_gt_angle_cls, assigned_gt_angle_res, assigned_gt_velocity, assigned_gt_attribute = returned_list # encode offset assigned_gt_offset, assigned_gt_angle_cls, assigned_gt_angle_res = self.encoder_decoder.encode( base_xyz, assigned_gt_boxes_3d, anchors) # corner_loss corner_loss_angle_cls = tf.cast( tf.one_hot(assigned_gt_angle_cls, depth=cfg.MODEL.ANGLE_CLS_NUM, on_value=1, off_value=0, axis=-1), tf.float32) # bs, pts_num, cls_num, -1 pred_anchors_3d = self.encoder_decoder.decode( base_xyz, pred_offset, corner_loss_angle_cls, pred_angle_res, self.is_training, anchors) # [bs, points_num, cls_num, 7] pred_corners = transfer_box3d_to_corners( pred_anchors_3d) # [bs, points_num, cls_num, 8, 3] gt_corners = transfer_box3d_to_corners( assigned_gt_boxes_3d) # [bs, points_num, cls_num,8,3] self.output[maps_dict.CORNER_LOSS_PRED_BOXES_CORNERS].append( pred_corners) self.labels[maps_dict.CORNER_LOSS_GT_BOXES_CORNERS].append(gt_corners) self.labels[maps_dict.GT_CLS].append(assigned_gt_labels) self.labels[maps_dict.GT_BOXES_ANCHORS_3D].append(assigned_gt_boxes_3d) self.labels[maps_dict.GT_OFFSET].append(assigned_gt_offset) self.labels[maps_dict.GT_ANGLE_CLS].append(assigned_gt_angle_cls) self.labels[maps_dict.GT_ANGLE_RES].append(assigned_gt_angle_res) self.labels[maps_dict.GT_ATTRIBUTE].append(assigned_gt_attribute) self.labels[maps_dict.GT_VELOCITY].append(assigned_gt_velocity) self.labels[maps_dict.GT_PMASK].append(assigned_pmask) self.labels[maps_dict.GT_NMASK].append(assigned_nmask) # self.labels[maps_dict.PL_LABEL_SEMSEGS].append(point_sem_labels) self.loss_builder.forward(index, self.labels, self.output, self.placeholders, self.corner_loss, self.vote_loss, self.attr_velo_loss, self.iou_loss) def test_forward(self, index, anchors): base_xyz = self.output[maps_dict.KEY_OUTPUT_XYZ][index] pred_cls = self.output[maps_dict.PRED_CLS][ index] # [bs, points_num, cls_num + 1/0] pred_offset = self.output[maps_dict.PRED_OFFSET][index] pred_angle_cls = self.output[maps_dict.PRED_ANGLE_CLS][index] pred_angle_res = self.output[maps_dict.PRED_ANGLE_RES][index] # decode predictions pred_anchors_3d = self.encoder_decoder.decode( base_xyz, pred_offset, pred_angle_cls, pred_angle_res, self.is_training, anchors) # [bs, points_num, cls_num, 7] # decode classification if cfg.MODEL.FIRST_STAGE.CLS_ACTIVATION == 'Softmax': # softmax pred_score = tf.nn.softmax(pred_cls) pred_score = tf.slice(pred_score, [0, 0, 1], [-1, -1, -1]) else: # sigmoid pred_score = tf.nn.sigmoid(pred_cls) # using IoU branch proposed by sparse-to-dense if self.iou_loss: pred_iou = self.output[maps_dict.PRED_IOU_3D_VALUE][index] pred_score = pred_score * pred_iou if len(self.output[maps_dict.PRED_ATTRIBUTE]) <= 0: pred_attribute = None else: pred_attribute = self.output[maps_dict.PRED_ATTRIBUTE][index] if len(self.output[maps_dict.PRED_VELOCITY]) <= 0: pred_velocity = None else: pred_velocity = self.output[maps_dict.PRED_VELOCITY][index] self.postprocessor.forward(pred_anchors_3d, pred_score, self.output, pred_attribute, pred_velocity)
class SingleStageDetector: def __init__(self, batch_size, is_training): self.batch_size = batch_size self.is_training = is_training # placeholders self.placeholders_builder = PlaceHolders(self.batch_size) self.placeholders_builder.get_placeholders() self.placeholders = self.placeholders_builder.placeholders self.cls_list = cfg.DATASET.KITTI.CLS_LIST self.cls2idx = dict([(cls, i + 1) for i, cls in enumerate(self.cls_list)]) self.idx2cls = dict([(i + 1, cls) for i, cls in enumerate(self.cls_list)]) # anchor_builder self.anchor_builder = Anchors(0, self.cls_list) # encoder_decoder self.encoder_decoder = EncoderDecoder(0) # postprocessor self.postprocessor = PostProcessor(0, len(self.cls_list)) # loss builder self.loss_builder = LossBuilder(0) self.corner_loss = cfg.MODEL.FIRST_STAGE.CORNER_LOSS # head builder self.iou_loss = False self.heads = [] head_cfg = cfg.MODEL.NETWORK.FIRST_STAGE.HEAD for i in range(len(head_cfg)): self.heads.append( HeadBuilder(self.batch_size, self.anchor_builder.anchors_num, 0, head_cfg[i], is_training)) if self.heads[-1].layer_type == 'IoU': self.iou_loss = True # target assigner self.target_assigner = TargetAssigner(0) # first stage self.vote_loss = False # layer builder layer_cfg = cfg.MODEL.NETWORK.FIRST_STAGE.ARCHITECTURE layers = [] for i in range(len(layer_cfg)): layers.append(LayerBuilder(i, self.is_training, layer_cfg)) if layers[-1].layer_type == 'Vote_Layer': self.vote_loss = True self.layers = layers self.attr_velo_loss = cfg.MODEL.FIRST_STAGE.PREDICT_ATTRIBUTE_AND_VELOCITY self.__init_dict() def __init_dict(self): self.output = dict() # sampled xyz/feature self.output[maps_dict.KEY_OUTPUT_XYZ] = [] self.output[maps_dict.KEY_OUTPUT_FEATURE] = [] # generated anchors self.output[maps_dict.KEY_ANCHORS_3D] = [] # generated anchors # vote output self.output[maps_dict.PRED_VOTE_OFFSET] = [] self.output[maps_dict.PRED_VOTE_BASE] = [] # det output self.output[maps_dict.PRED_CLS] = [] self.output[maps_dict.PRED_OFFSET] = [] self.output[maps_dict.PRED_ANGLE_CLS] = [] self.output[maps_dict.PRED_ANGLE_RES] = [] self.output[maps_dict.CORNER_LOSS_PRED_BOXES_CORNERS] = [] self.output[maps_dict.PRED_ATTRIBUTE] = [] self.output[maps_dict.PRED_VELOCITY] = [] # iou output self.output[maps_dict.PRED_IOU_3D_VALUE] = [] # final result self.output[maps_dict.PRED_3D_BBOX] = [] self.output[maps_dict.PRED_3D_SCORE] = [] self.output[maps_dict.PRED_3D_CLS_CATEGORY] = [] self.output[maps_dict.PRED_3D_ATTRIBUTE] = [] self.output[maps_dict.PRED_3D_VELOCITY] = [] self.prediction_keys = self.output.keys() self.labels = dict() self.labels[maps_dict.GT_CLS] = [] self.labels[maps_dict.GT_OFFSET] = [] self.labels[maps_dict.GT_ANGLE_CLS] = [] self.labels[maps_dict.GT_ANGLE_RES] = [] self.labels[maps_dict.GT_ATTRIBUTE] = [] self.labels[maps_dict.GT_VELOCITY] = [] self.labels[maps_dict.GT_BOXES_ANCHORS_3D] = [] self.labels[maps_dict.GT_IOU_3D_VALUE] = [] self.labels[maps_dict.GT_PMASK] = [] self.labels[maps_dict.GT_NMASK] = [] self.labels[maps_dict.CORNER_LOSS_GT_BOXES_CORNERS] = [] def network_forward(self, point_cloud, bn_decay): l0_xyz = tf.slice(point_cloud, [0, 0, 0], [-1, -1, 3]) l0_points = tf.slice(point_cloud, [0, 0, 3], [-1, -1, -1]) xyz_list, feature_list, fps_idx_list = [l0_xyz], [l0_points], [None] for layer in self.layers: xyz_list, feature_list, fps_idx_list = layer.build_layer( xyz_list, feature_list, fps_idx_list, bn_decay, self.output) cur_head_start_idx = len(self.output[maps_dict.KEY_OUTPUT_XYZ]) for head in self.heads: head.build_layer(xyz_list, feature_list, bn_decay, self.output) merge_head_prediction(cur_head_start_idx, self.output, self.prediction_keys) def model_forward(self, bn_decay=None): points_input_det = self.placeholders[maps_dict.PL_POINTS_INPUT] # forward the point cloud self.network_forward(points_input_det, bn_decay) # generate anchors base_xyz = self.output[maps_dict.KEY_OUTPUT_XYZ][-1] anchors = self.anchor_builder.generate( base_xyz) # [bs, pts_num, 1/cls_num, 7] self.output[maps_dict.KEY_ANCHORS_3D].append(anchors) if self.is_training: # training mode self.train_forward(-1, anchors) else: # testing mode self.test_forward(-1, anchors) def train_forward(self, index, anchors): """ Calculating loss """ base_xyz = self.output[maps_dict.KEY_OUTPUT_XYZ][index] pred_offset = self.output[maps_dict.PRED_OFFSET][index] pred_angle_cls = self.output[maps_dict.PRED_ANGLE_CLS][index] pred_angle_res = self.output[maps_dict.PRED_ANGLE_RES][index] gt_boxes_3d = self.placeholders[maps_dict.PL_LABEL_BOXES_3D] gt_classes = self.placeholders[maps_dict.PL_LABEL_CLASSES] gt_angle_cls = self.placeholders[maps_dict.PL_ANGLE_CLS] gt_angle_res = self.placeholders[maps_dict.PL_ANGLE_RESIDUAL] if maps_dict.PL_LABEL_ATTRIBUTES in self.placeholders.keys(): gt_attributes = self.placeholders[maps_dict.PL_LABEL_ATTRIBUTES] else: gt_attributes = None if maps_dict.PL_LABEL_VELOCITY in self.placeholders.keys(): gt_velocity = self.placeholders[maps_dict.PL_LABEL_VELOCITY] else: gt_velocity = None returned_list = self.target_assigner.assign(base_xyz, anchors, gt_boxes_3d, gt_classes, gt_angle_cls, gt_angle_res, gt_velocity, gt_attributes) assigned_idx, assigned_pmask, assigned_nmask, assigned_gt_boxes_3d, assigned_gt_labels, assigned_gt_angle_cls, assigned_gt_angle_res, assigned_gt_velocity, assigned_gt_attribute = returned_list # encode offset assigned_gt_offset, assigned_gt_angle_cls, assigned_gt_angle_res = self.encoder_decoder.encode( base_xyz, assigned_gt_boxes_3d, anchors) # corner_loss corner_loss_angle_cls = tf.cast( tf.one_hot(assigned_gt_angle_cls, depth=cfg.MODEL.ANGLE_CLS_NUM, on_value=1, off_value=0, axis=-1), tf.float32) # bs, pts_num, cls_num, -1 pred_anchors_3d = self.encoder_decoder.decode( base_xyz, pred_offset, corner_loss_angle_cls, pred_angle_res, self.is_training, anchors) # [bs, points_num, cls_num, 7] pred_corners = transfer_box3d_to_corners( pred_anchors_3d) # [bs, points_num, cls_num, 8, 3] gt_corners = transfer_box3d_to_corners( assigned_gt_boxes_3d) # [bs, points_num, cls_num,8,3] self.output[maps_dict.CORNER_LOSS_PRED_BOXES_CORNERS].append( pred_corners) self.labels[maps_dict.CORNER_LOSS_GT_BOXES_CORNERS].append(gt_corners) self.labels[maps_dict.GT_CLS].append(assigned_gt_labels) self.labels[maps_dict.GT_BOXES_ANCHORS_3D].append(assigned_gt_boxes_3d) self.labels[maps_dict.GT_OFFSET].append(assigned_gt_offset) self.labels[maps_dict.GT_ANGLE_CLS].append(assigned_gt_angle_cls) self.labels[maps_dict.GT_ANGLE_RES].append(assigned_gt_angle_res) self.labels[maps_dict.GT_ATTRIBUTE].append(assigned_gt_attribute) self.labels[maps_dict.GT_VELOCITY].append(assigned_gt_velocity) self.labels[maps_dict.GT_PMASK].append(assigned_pmask) self.labels[maps_dict.GT_NMASK].append(assigned_nmask) self.loss_builder.forward(index, self.labels, self.output, self.placeholders, self.corner_loss, self.vote_loss, self.attr_velo_loss, self.iou_loss) def test_forward(self, index, anchors): base_xyz = self.output[maps_dict.KEY_OUTPUT_XYZ][index] pred_cls = self.output[maps_dict.PRED_CLS][ index] # [bs, points_num, cls_num + 1/0] pred_offset = self.output[maps_dict.PRED_OFFSET][index] pred_angle_cls = self.output[maps_dict.PRED_ANGLE_CLS][index] pred_angle_res = self.output[maps_dict.PRED_ANGLE_RES][index] # decode predictions pred_anchors_3d = self.encoder_decoder.decode( base_xyz, pred_offset, pred_angle_cls, pred_angle_res, self.is_training, anchors) # [bs, points_num, cls_num, 7] # decode classification if cfg.MODEL.FIRST_STAGE.CLS_ACTIVATION == 'Softmax': # softmax pred_score = tf.nn.softmax(pred_cls) pred_score = tf.slice(pred_score, [0, 0, 1], [-1, -1, -1]) else: # sigmoid pred_score = tf.nn.sigmoid(pred_cls) # using IoU branch proposed by sparse-to-dense if self.iou_loss: pred_iou = self.output[maps_dict.PRED_IOU_3D_VALUE][index] pred_score = pred_score * pred_iou if len(self.output[maps_dict.PRED_ATTRIBUTE]) <= 0: pred_attribute = None else: pred_attribute = self.output[maps_dict.PRED_ATTRIBUTE][index] if len(self.output[maps_dict.PRED_VELOCITY]) <= 0: pred_velocity = None else: pred_velocity = self.output[maps_dict.PRED_VELOCITY][index] self.postprocessor.forward(pred_anchors_3d, pred_score, self.output, pred_attribute, pred_velocity)
def __init__(self, batch_size, is_training): self.batch_size = batch_size self.is_training = is_training self.only_first_stage = cfg.MODEL.ONLY_FIRST_STAGE # placeholders self.placeholders_builder = PlaceHolders(self.batch_size) self.placeholders_builder.get_placeholders() self.placeholders = self.placeholders_builder.placeholders self.cls_list = cfg.DATASET.KITTI.CLS_LIST self.cls2idx = dict([(cls, i + 1) for i, cls in enumerate(self.cls_list)]) self.idx2cls = dict([(i + 1, cls) for i, cls in enumerate(self.cls_list)]) # anchor_builder self.anchor_builder = Anchors(0, self.cls_list) # encoder_decoder self.encoder_decoder_list = [EncoderDecoder(0), EncoderDecoder(1)] # postprocessor self.postprocessor_list = [ PostProcessor(0, 1), PostProcessor(1, len(self.cls_list)) ] # loss builder self.loss_builder_list = [LossBuilder(0), LossBuilder(1)] # target assigner self.target_assigner_list = [TargetAssigner(0), TargetAssigner(1)] # sampler self.sampler = Sampler(1) # points pooler pool_cfg = cfg.MODEL.NETWORK.FIRST_STAGE.POINTS_POOLER self.pool_mask_thresh = cfg.MODEL.NETWORK.FIRST_STAGE.POOLER_MASK_THRESHOLD self.points_pooler = PointsPooler(pool_cfg) ############### RPN head/network definition ############## ### head self.rpn_iou_loss = False self.rpn_heads = [] head_cfg = cfg.MODEL.NETWORK.FIRST_STAGE.HEAD for i in range(len(head_cfg)): self.rpn_heads.append( HeadBuilder(self.batch_size, self.anchor_builder.anchors_num, 0, head_cfg[i], is_training)) if self.rpn_heads[-1].layer_type == 'IoU': self.rpn_iou_loss = True ### network self.rpn_vote_loss = False self.rpn_layers = [] layer_cfg = cfg.MODEL.NETWORK.FIRST_STAGE.ARCHITECTURE for i in range(len(layer_cfg)): self.rpn_layers.append(LayerBuilder(i, self.is_training, layer_cfg)) if self.rpn_layers[-1].layer_type == 'Vote_Layer': self.rpn_vote_loss = True ############### RCNN-stage head/network definition ############## ### head self.rcnn_iou_loss = False self.rcnn_heads = [] head_cfg = cfg.MODEL.NETWORK.SECOND_STAGE.HEAD for i in range(len(head_cfg)): self.rcnn_heads.append( HeadBuilder(self.batch_size, 1, 1, head_cfg[i], is_training)) if self.rcnn_heads[-1].layer_type == 'IoU': self.rcnn_iou_loss = True ### network self.rcnn_vote_loss = False self.rcnn_layers = [] layer_cfg = cfg.MODEL.NETWORK.SECOND_STAGE.ARCHITECTURE for i in range(len(layer_cfg)): self.rcnn_layers.append( LayerBuilder(i, self.is_training, layer_cfg)) if self.rcnn_layers[-1].layer_type == 'Vote_Layer': self.rcnn_vote_loss = True self.heads = [self.rpn_heads, self.rcnn_heads] self.layers = [self.rpn_layers, self.rcnn_layers] self.corner_loss = [ cfg.MODEL.FIRST_STAGE.CORNER_LOSS, cfg.MODEL.SECOND_STAGE.CORNER_LOSS ] self.vote_loss = [self.rpn_vote_loss, self.rcnn_vote_loss] self.iou_loss = [self.rpn_iou_loss, self.rcnn_iou_loss] self.attr_velo_loss = [ False, cfg.MODEL.SECOND_STAGE.PREDICT_ATTRIBUTE_AND_VELOCITY ] self.__init_dict()
class DoubleStageDetector: def __init__(self, batch_size, is_training): self.batch_size = batch_size self.is_training = is_training self.only_first_stage = cfg.MODEL.ONLY_FIRST_STAGE # placeholders self.placeholders_builder = PlaceHolders(self.batch_size) self.placeholders_builder.get_placeholders() self.placeholders = self.placeholders_builder.placeholders self.cls_list = cfg.DATASET.KITTI.CLS_LIST self.cls2idx = dict([(cls, i + 1) for i, cls in enumerate(self.cls_list)]) self.idx2cls = dict([(i + 1, cls) for i, cls in enumerate(self.cls_list)]) # anchor_builder self.anchor_builder = Anchors(0, self.cls_list) # encoder_decoder self.encoder_decoder_list = [EncoderDecoder(0), EncoderDecoder(1)] # postprocessor self.postprocessor_list = [ PostProcessor(0, 1), PostProcessor(1, len(self.cls_list)) ] # loss builder self.loss_builder_list = [LossBuilder(0), LossBuilder(1)] # target assigner self.target_assigner_list = [TargetAssigner(0), TargetAssigner(1)] # sampler self.sampler = Sampler(1) # points pooler pool_cfg = cfg.MODEL.NETWORK.FIRST_STAGE.POINTS_POOLER self.pool_mask_thresh = cfg.MODEL.NETWORK.FIRST_STAGE.POOLER_MASK_THRESHOLD self.points_pooler = PointsPooler(pool_cfg) ############### RPN head/network definition ############## ### head self.rpn_iou_loss = False self.rpn_heads = [] head_cfg = cfg.MODEL.NETWORK.FIRST_STAGE.HEAD for i in range(len(head_cfg)): self.rpn_heads.append( HeadBuilder(self.batch_size, self.anchor_builder.anchors_num, 0, head_cfg[i], is_training)) if self.rpn_heads[-1].layer_type == 'IoU': self.rpn_iou_loss = True ### network self.rpn_vote_loss = False self.rpn_layers = [] layer_cfg = cfg.MODEL.NETWORK.FIRST_STAGE.ARCHITECTURE for i in range(len(layer_cfg)): self.rpn_layers.append(LayerBuilder(i, self.is_training, layer_cfg)) if self.rpn_layers[-1].layer_type == 'Vote_Layer': self.rpn_vote_loss = True ############### RCNN-stage head/network definition ############## ### head self.rcnn_iou_loss = False self.rcnn_heads = [] head_cfg = cfg.MODEL.NETWORK.SECOND_STAGE.HEAD for i in range(len(head_cfg)): self.rcnn_heads.append( HeadBuilder(self.batch_size, 1, 1, head_cfg[i], is_training)) if self.rcnn_heads[-1].layer_type == 'IoU': self.rcnn_iou_loss = True ### network self.rcnn_vote_loss = False self.rcnn_layers = [] layer_cfg = cfg.MODEL.NETWORK.SECOND_STAGE.ARCHITECTURE for i in range(len(layer_cfg)): self.rcnn_layers.append( LayerBuilder(i, self.is_training, layer_cfg)) if self.rcnn_layers[-1].layer_type == 'Vote_Layer': self.rcnn_vote_loss = True self.heads = [self.rpn_heads, self.rcnn_heads] self.layers = [self.rpn_layers, self.rcnn_layers] self.corner_loss = [ cfg.MODEL.FIRST_STAGE.CORNER_LOSS, cfg.MODEL.SECOND_STAGE.CORNER_LOSS ] self.vote_loss = [self.rpn_vote_loss, self.rcnn_vote_loss] self.iou_loss = [self.rpn_iou_loss, self.rcnn_iou_loss] self.attr_velo_loss = [ False, cfg.MODEL.SECOND_STAGE.PREDICT_ATTRIBUTE_AND_VELOCITY ] self.__init_dict() def __init_dict(self): self.output = dict() # sampled xyz/feature self.output[maps_dict.KEY_OUTPUT_XYZ] = [] self.output[maps_dict.KEY_OUTPUT_FEATURE] = [] # generated anchors self.output[maps_dict.KEY_ANCHORS_3D] = [] # generated anchors # vote output self.output[maps_dict.PRED_VOTE_OFFSET] = [] self.output[maps_dict.PRED_VOTE_BASE] = [] # det output self.output[maps_dict.PRED_CLS] = [] self.output[maps_dict.PRED_OFFSET] = [] self.output[maps_dict.PRED_ANGLE_CLS] = [] self.output[maps_dict.PRED_ANGLE_RES] = [] self.output[maps_dict.CORNER_LOSS_PRED_BOXES_CORNERS] = [] self.output[maps_dict.PRED_ATTRIBUTE] = [] self.output[maps_dict.PRED_VELOCITY] = [] # iou output self.output[maps_dict.PRED_IOU_3D_VALUE] = [] # final result self.output[maps_dict.PRED_3D_BBOX] = [] self.output[maps_dict.PRED_3D_SCORE] = [] self.output[maps_dict.PRED_3D_CLS_CATEGORY] = [] self.output[maps_dict.PRED_3D_ATTRIBUTE] = [] self.output[maps_dict.PRED_3D_VELOCITY] = [] self.prediction_keys = self.output.keys() self.labels = dict() self.labels[maps_dict.GT_CLS] = [] self.labels[maps_dict.GT_OFFSET] = [] self.labels[maps_dict.GT_ANGLE_CLS] = [] self.labels[maps_dict.GT_ANGLE_RES] = [] self.labels[maps_dict.GT_ATTRIBUTE] = [] self.labels[maps_dict.GT_VELOCITY] = [] self.labels[maps_dict.GT_BOXES_ANCHORS_3D] = [] self.labels[maps_dict.GT_IOU_3D_VALUE] = [] self.labels[maps_dict.GT_PMASK] = [] self.labels[maps_dict.GT_NMASK] = [] self.labels[maps_dict.CORNER_LOSS_GT_BOXES_CORNERS] = [] def network_forward(self, point_cloud, index, bn_decay, xyz_list, feature_list, fps_idx_list): l0_xyz = tf.slice(point_cloud, [0, 0, 0], [-1, -1, 3]) l0_points = tf.slice(point_cloud, [0, 0, 3], [-1, -1, -1]) xyz_list.append(l0_xyz) feature_list.append(l0_points) fps_idx_list.append(None) layers, heads = self.layers[index], self.heads[index] for layer in layers: xyz_list, feature_list, fps_idx_list = layer.build_layer( xyz_list, feature_list, fps_idx_list, bn_decay, self.output) cur_head_start_idx = len(self.output[maps_dict.KEY_OUTPUT_XYZ]) for head in heads: head.build_layer(xyz_list, feature_list, bn_decay, self.output) merge_head_prediction(cur_head_start_idx, self.output, self.prediction_keys) def model_forward(self, bn_decay=None): points_input_det = self.placeholders[maps_dict.PL_POINTS_INPUT] # forward the point cloud xyz_list, feature_list, fps_idx_list = [], [], [] self.network_forward(points_input_det, 0, bn_decay, xyz_list, feature_list, fps_idx_list) # generate anchors base_xyz = self.output[maps_dict.KEY_OUTPUT_XYZ][-1] anchors = self.anchor_builder.generate( base_xyz) # [bs, pts_num, 1/cls_num, 7] self.output[maps_dict.KEY_ANCHORS_3D].append(anchors) if self.is_training: # training mode self.target_assign(-1, 0, base_xyz, anchors) self.train_forward(-1, 0, anchors) # decode proposals self.test_forward(-1, 0, cfg.MODEL.FIRST_STAGE, anchors) if self.only_first_stage: return # [bs, proposal_num, 7] proposals = self.output[maps_dict.PRED_3D_BBOX][-1] proposals = tf.reshape( proposals, [self.batch_size, cfg.MODEL.FIRST_STAGE.MAX_OUTPUT_NUM, 7]) expand_proposals = tf.expand_dims(proposals, axis=2) ctr_proposals = cast_bottom_to_center(proposals) if self.is_training: valid_mask = self.points_pooler.get_valid_mask(base_xyz, proposals) expand_proposals = self.target_assign(-1, 1, ctr_proposals[:, :, :3], expand_proposals, valid_mask) proposals = tf.squeeze(expand_proposals, axis=2) ctr_proposals = cast_bottom_to_center(proposals) # [bs, proposal_num, 1, 7] self.output[maps_dict.KEY_ANCHORS_3D].append(expand_proposals) # pool base_feature = self.output[maps_dict.KEY_OUTPUT_FEATURE][-1] base_mask = self.output[maps_dict.PRED_3D_SCORE][-1] base_mask = tf.cast(tf.greater_equal(base_mask, self.pool_mask_thresh), tf.float32) base_mask = tf.expand_dims(base_mask, axis=-1) # [bs, proposal_num, 1] pool_feature, pool_mask = self.points_pooler.pool( base_xyz, base_feature, base_mask, proposals, self.is_training, bn_decay) # [bs * proposal_num, sample_num, 3+c] # initialize the list of stage-2 with proposal center xyz_list, feature_list, fps_idx_list = [ctr_proposals[:, :, :3] ], [None], [None] # second-stage forward self.network_forward(pool_feature, 1, bn_decay, xyz_list, feature_list, fps_idx_list) if self.is_training: # training mode self.train_forward(-1, 1, expand_proposals) else: self.test_forward(-1, 1, cfg.MODEL.SECOND_STAGE, expand_proposals, valid_mask=pool_mask) def target_assign(self, index, stage_index, base_xyz, anchors, valid_mask=None): """ Assign target labels for each anchor/proposal If stage_index >= 1: also gather assigned proposals out """ encoder_decoder = self.encoder_decoder_list[stage_index] target_assigner = self.target_assigner_list[stage_index] gt_boxes_3d = self.placeholders[maps_dict.PL_LABEL_BOXES_3D] gt_classes = self.placeholders[maps_dict.PL_LABEL_CLASSES] gt_angle_cls = self.placeholders[maps_dict.PL_ANGLE_CLS] gt_angle_res = self.placeholders[maps_dict.PL_ANGLE_RESIDUAL] if maps_dict.PL_LABEL_ATTRIBUTES in self.placeholders.keys(): gt_attributes = self.placeholders[maps_dict.PL_LABEL_ATTRIBUTES] else: gt_attributes = None if maps_dict.PL_LABEL_VELOCITY in self.placeholders.keys(): gt_velocity = self.placeholders[maps_dict.PL_LABEL_VELOCITY] else: gt_velocity = None returned_list = target_assigner.assign(base_xyz, anchors, gt_boxes_3d, gt_classes, gt_angle_cls, gt_angle_res, gt_velocity, gt_attributes, valid_mask) assigned_idx, assigned_pmask, assigned_nmask, assigned_gt_boxes_3d, assigned_gt_labels, assigned_gt_angle_cls, assigned_gt_angle_res, assigned_gt_velocity, assigned_gt_attribute = returned_list # encode offset assigned_gt_offset, assigned_gt_angle_cls, assigned_gt_angle_res = encoder_decoder.encode( base_xyz, assigned_gt_boxes_3d, anchors) if stage_index >= 1: # gather assigned proposal out for reducing memory cost assigned_mask = assigned_pmask + assigned_nmask anchors, assigned_pmask, assigned_nmask, assigned_gt_boxes_3d, assigned_gt_labels, \ assigned_gt_offset, assigned_gt_angle_cls, assigned_gt_angle_res, \ assigned_gt_velocity, assigned_gt_attribute = self.sampler.gather_list(\ assigned_mask, [anchors, assigned_pmask, assigned_nmask, assigned_gt_boxes_3d, assigned_gt_labels,\ assigned_gt_offset, assigned_gt_angle_cls, assigned_gt_angle_res, \ assigned_gt_velocity, assigned_gt_attribute]) self.labels[maps_dict.GT_CLS].append(assigned_gt_labels) self.labels[maps_dict.GT_BOXES_ANCHORS_3D].append(assigned_gt_boxes_3d) self.labels[maps_dict.GT_OFFSET].append(assigned_gt_offset) self.labels[maps_dict.GT_ANGLE_CLS].append(assigned_gt_angle_cls) self.labels[maps_dict.GT_ANGLE_RES].append(assigned_gt_angle_res) self.labels[maps_dict.GT_ATTRIBUTE].append(assigned_gt_attribute) self.labels[maps_dict.GT_VELOCITY].append(assigned_gt_velocity) self.labels[maps_dict.GT_PMASK].append(assigned_pmask) self.labels[maps_dict.GT_NMASK].append(assigned_nmask) return anchors def train_forward(self, index, stage_index, anchors, valid_mask=None): """ Calculating loss """ loss_builder = self.loss_builder_list[stage_index] encoder_decoder = self.encoder_decoder_list[stage_index] base_xyz = self.output[maps_dict.KEY_OUTPUT_XYZ][index] pred_offset = self.output[maps_dict.PRED_OFFSET][index] pred_angle_cls = self.output[maps_dict.PRED_ANGLE_CLS][index] pred_angle_res = self.output[maps_dict.PRED_ANGLE_RES][index] # corner_loss assigned_gt_angle_cls = self.labels[maps_dict.GT_ANGLE_CLS][index] assigned_gt_boxes_3d = self.labels[ maps_dict.GT_BOXES_ANCHORS_3D][index] corner_loss_angle_cls = tf.cast( tf.one_hot(assigned_gt_angle_cls, depth=cfg.MODEL.ANGLE_CLS_NUM, on_value=1, off_value=0, axis=-1), tf.float32) # bs, pts_num, cls_num, -1 pred_anchors_3d = encoder_decoder.decode( base_xyz, pred_offset, corner_loss_angle_cls, pred_angle_res, self.is_training, anchors) # [bs, points_num, cls_num, 7] pred_corners = transfer_box3d_to_corners( pred_anchors_3d) # [bs, points_num, cls_num, 8, 3] gt_corners = transfer_box3d_to_corners( assigned_gt_boxes_3d) # [bs, points_num, cls_num,8,3] self.output[maps_dict.CORNER_LOSS_PRED_BOXES_CORNERS].append( pred_corners) self.labels[maps_dict.CORNER_LOSS_GT_BOXES_CORNERS].append(gt_corners) loss_builder.forward(index, self.labels, self.output, self.placeholders, self.corner_loss[stage_index], self.vote_loss[stage_index], self.attr_velo_loss[stage_index], self.iou_loss[stage_index]) def test_forward(self, index, stage_index, stage_cfg, anchors, valid_mask=None): encoder_decoder = self.encoder_decoder_list[stage_index] postprocessor = self.postprocessor_list[stage_index] base_xyz = self.output[maps_dict.KEY_OUTPUT_XYZ][index] pred_cls = self.output[maps_dict.PRED_CLS][ index] # [bs, points_num, cls_num + 1/0] pred_offset = self.output[maps_dict.PRED_OFFSET][index] pred_angle_cls = self.output[maps_dict.PRED_ANGLE_CLS][index] pred_angle_res = self.output[maps_dict.PRED_ANGLE_RES][index] # decode predictions pred_anchors_3d = encoder_decoder.decode( base_xyz, pred_offset, pred_angle_cls, pred_angle_res, self.is_training, anchors) # [bs, points_num, cls_num, 7] # decode classification if stage_cfg.CLS_ACTIVATION == 'Softmax': # softmax pred_score = tf.nn.softmax(pred_cls) pred_score = tf.slice(pred_score, [0, 0, 1], [-1, -1, -1]) else: # sigmoid pred_score = tf.nn.sigmoid(pred_cls) # using IoU branch proposed by sparse-to-dense if self.iou_loss[stage_index]: pred_iou = self.output[maps_dict.PRED_IOU_3D_VALUE][index] pred_score = pred_score * pred_iou if valid_mask is not None: valid_mask = tf.cast(valid_mask, tf.float32) pred_score = pred_score * valid_mask if len(self.output[maps_dict.PRED_ATTRIBUTE]) <= 0: pred_attribute = None else: pred_attribute = self.output[maps_dict.PRED_ATTRIBUTE][index] if len(self.output[maps_dict.PRED_VELOCITY]) <= 0: pred_velocity = None else: pred_velocity = self.output[maps_dict.PRED_VELOCITY][index] postprocessor.forward(pred_anchors_3d, pred_score, self.output, pred_attribute, pred_velocity)