Пример #1
0
def train(model_config, train_config, dataset_config):

    dataset = DatasetBuilder.build_kitti_dataset(dataset_config,
                                                 use_defaults=False)

    train_val_test = 'train'
    model_name = model_config.model_name

    with tf.Graph().as_default():
        if model_name == 'rpn_model':
            model = RpnModel(model_config,
                             train_val_test=train_val_test,
                             dataset=dataset)
        elif model_name == 'mlod_model':
            model = MlodModel(model_config,
                              train_val_test=train_val_test,
                              dataset=dataset)
        else:
            raise ValueError('Invalid model_name')

        trainer.train(model, train_config)
Пример #2
0
    def test_create_path_drop_masks(self):
        # Tests creating path drop choices
        # based on the given probabilities

        rpn_model = RpnModel(self.model_config,
                             train_val_test="val",
                             dataset=self.dataset)
        rpn_model.build()
        ##################################
        # Test-Case 1 : Keep img, Keep bev
        ##################################
        p_img = tf.constant(0.6)
        p_bev = tf.constant(0.85)

        # Set the random numbers for testing purposes
        rand_choice = [0.53, 0.83, 0.05]
        rand_choice_tensor = tf.convert_to_tensor(rand_choice)

        img_mask, bev_mask = rpn_model.create_path_drop_masks(
            p_img, p_bev, rand_choice_tensor)

        with self.test_session():
            img_mask_out = img_mask.eval()
            bev_mask_out = bev_mask.eval()
            np.testing.assert_array_equal(img_mask_out, 1.0)
            np.testing.assert_array_equal(bev_mask_out, 1.0)

        ##################################
        # Test-Case 2 : Kill img, Keep bev
        ##################################
        p_img = tf.constant(0.2)
        p_bev = tf.constant(0.85)

        img_mask, bev_mask = rpn_model.create_path_drop_masks(
            p_img, p_bev, rand_choice_tensor)

        with self.test_session():
            img_mask_out = img_mask.eval()
            bev_mask_out = bev_mask.eval()
            np.testing.assert_array_equal(img_mask_out, 0.0)
            np.testing.assert_array_equal(bev_mask_out, 1.0)

        ##################################
        # Test-Case 3 : Keep img, Kill bev
        ##################################
        p_img = tf.constant(0.9)
        p_bev = tf.constant(0.1)

        img_mask, bev_mask = rpn_model.create_path_drop_masks(
            p_img, p_bev, rand_choice_tensor)

        with self.test_session():
            img_mask_out = img_mask.eval()
            bev_mask_out = bev_mask.eval()
            np.testing.assert_array_equal(img_mask_out, 1.0)
            np.testing.assert_array_equal(bev_mask_out, 0.0)

        ##############################################
        # Test-Case 4 : Kill img, Kill bev, third flip
        ##############################################
        p_img = tf.constant(0.0)
        p_bev = tf.constant(0.1)

        img_mask, bev_mask = rpn_model.create_path_drop_masks(
            p_img, p_bev, rand_choice_tensor)

        with self.test_session():
            img_mask_out = img_mask.eval()
            bev_mask_out = bev_mask.eval()
            np.testing.assert_array_equal(img_mask_out, 0.0)
            # Because of the third condition, we expect to be keeping bev
            np.testing.assert_array_equal(bev_mask_out, 1.0)

        ##############################################
        # Test-Case 5 : Kill img, Kill bev, third flip
        ##############################################
        # Let's flip the third chance and keep img instead
        rand_choice = [0.53, 0.83, 0.61]
        rand_choice_tensor = tf.convert_to_tensor(rand_choice)
        p_img = tf.constant(0.0)
        p_bev = tf.constant(0.1)

        img_mask, bev_mask = rpn_model.create_path_drop_masks(
            p_img, p_bev, rand_choice_tensor)

        with self.test_session():
            img_mask_out = img_mask.eval()
            bev_mask_out = bev_mask.eval()
            # Because of the third condition, we expect to be keeping img
            np.testing.assert_array_equal(img_mask_out, 1.0)
            np.testing.assert_array_equal(bev_mask_out, 0.0)
Пример #3
0
    def test_path_drop_input_multiplication(self):
        # Tests the result of final image/bev inputs
        # based on the path drop decisions

        rpn_model = RpnModel(self.model_config,
                             train_val_test="val",
                             dataset=self.dataset)
        rpn_model.build()
        # Shape of input feature map
        dummy_img_feature_shape = [1, 30, 50, 2]
        random_values = np.random.randint(low=1.0,
                                          high=256.0,
                                          size=2).astype(np.float32)

        dummy_img_feature_map = tf.fill(dummy_img_feature_shape,
                                        random_values[0])
        # Assume both features map are the same size, this is not
        # the case inside the network
        dummy_bev_feature_map = tf.fill(dummy_img_feature_shape,
                                        random_values[1])

        ##################################
        # Test-Case 1 : Keep img, Kill bev
        ##################################
        exp_img_input = np.full(dummy_img_feature_shape, random_values[0])
        exp_bev_input = np.full(dummy_img_feature_shape, 0.0)

        p_img = tf.constant(0.6)
        p_bev = tf.constant(0.4)

        # Set the random numbers for testing purposes
        rand_choice = [0.53, 0.83, 0.05]
        rand_choice_tensor = tf.convert_to_tensor(rand_choice)

        img_mask, bev_mask = rpn_model.create_path_drop_masks(
            p_img, p_bev, rand_choice_tensor)

        final_img_input = tf.multiply(dummy_img_feature_map,
                                      img_mask)

        final_bev_input = tf.multiply(dummy_bev_feature_map,
                                      bev_mask)

        with self.test_session():
            final_img_input_out = final_img_input.eval()
            final_bev_input_out = final_bev_input.eval()
            np.testing.assert_array_equal(final_img_input_out,
                                          exp_img_input)
            np.testing.assert_array_equal(final_bev_input_out,
                                          exp_bev_input)

        ##################################
        # Test-Case 2 : Kill img, Keep bev
        ##################################
        exp_img_input = np.full(dummy_img_feature_shape, 0)
        exp_bev_input = np.full(dummy_img_feature_shape, random_values[1])

        p_img = tf.constant(0.4)
        p_bev = tf.constant(0.9)

        img_mask, bev_mask = rpn_model.create_path_drop_masks(
            p_img, p_bev, rand_choice_tensor)

        final_img_input = tf.multiply(dummy_img_feature_map,
                                      img_mask)

        final_bev_input = tf.multiply(dummy_bev_feature_map,
                                      bev_mask)

        with self.test_session():
            final_img_input_out = final_img_input.eval()
            final_bev_input_out = final_bev_input.eval()
            np.testing.assert_array_equal(final_img_input_out,
                                          exp_img_input)
            np.testing.assert_array_equal(final_bev_input_out,
                                          exp_bev_input)
Пример #4
0
    def __init__(self, model_config, train_val_test, dataset):
        """
        Args:
            model_config: configuration for the model
            train_val_test: "train", "val", or "test"
            dataset: the dataset that will provide samples and ground truth
        """

        # Sets model configs (_config)
        super(MlodModel, self).__init__(model_config)

        self.dataset = dataset

        # Dataset config
        self._num_final_classes = self.dataset.num_classes + 1

        # Input config
        input_config = self._config.input_config
        self._bev_pixel_size = np.asarray(
            [input_config.bev_dims_h, input_config.bev_dims_w])
        self._bev_depth = input_config.bev_depth

        self._img_pixel_size = np.asarray(
            [input_config.img_dims_h, input_config.img_dims_w])
        self._img_depth = [input_config.img_depth]

        # MLOD config
        mlod_config = self._config.mlod_config
        self._proposal_roi_crop_size = \
            [mlod_config.mlod_proposal_roi_crop_size] * 2
        self._positive_selection = mlod_config.mlod_positive_selection
        self._nms_size = mlod_config.mlod_nms_size
        self._nms_iou_threshold = mlod_config.mlod_nms_iou_thresh
        self._path_drop_probabilities = self._config.path_drop_probabilities
        self._box_rep = mlod_config.mlod_box_representation
        self.apply_occ_mask = mlod_config.apply_occ_mask
        self.occ_quantile_level = mlod_config.occ_quantile_level

        if self._box_rep not in [
                'box_3d', 'box_8c', 'box_8co', 'box_4c', 'box_4ca'
        ]:
            raise ValueError('Invalid box representation', self._box_rep)

        # Create the RpnModel
        self._rpn_model = RpnModel(model_config, train_val_test, dataset)

        self.lidar_only = self._rpn_model.lidar_only
        self.num_views = self._rpn_model.num_views
        self.offsets_ratio = mlod_config.offsets_ratio

        #Create the occlusion masking model
        self._n_split = 6
        self._occ_mask_layer = OccMaskLayer()

        if train_val_test not in ["train", "val", "test"]:
            raise ValueError('Invalid train_val_test value,'
                             'should be one of ["train", "val", "test"]')
        self._train_val_test = train_val_test
        self._is_training = (self._train_val_test == 'train')

        self.sample_info = {}

        self.img_idx = self.model_config.image_channel_indices
        if len(self.img_idx) > 1:
            self.multi_scale_image = True
        else:
            self.multi_scale_image = False
        self.feature_names = self.model_config.image_layer_extract
        self.feature_names = [
            'vgg_16/pyramid_fusion3', 'vgg_16/pyramid_fusion1'
        ]
        self.off_out_size = mlod_fc_layer_utils.OFFSETS_OUTPUT_SIZE[
            self._box_rep]
Пример #5
0
class MlodModel(model.DetectionModel):
    ##############################
    # Keys for Predictions
    ##############################
    # Mini batch (mb) ground truth
    PRED_MB_CLASSIFICATIONS_GT = 'mlod_mb_classifications_gt'
    PRED_MB_IMG_CLASSIFICATIONS_GT = 'mlod_gt_mb_classifications_gt'
    PRED_MB_OFFSETS_GT = 'mlod_mb_offsets_gt'
    PRED_MB_OFFSETS_RATIO_GT = 'mlod_mb_offsets_ratio_gt'
    PRED_MB_OFFSETS_2D_GT = 'mlod_mb_2d_offsets'
    PRED_MB_OFFSETS_RATIO_2D_GT = 'mlod_mb_2d_offsets_ratio'
    PRED_MB_ORIENTATIONS_GT = 'mlod_mb_orientations_gt'

    # Mini batch (mb) predictions
    PRED_MB_CLASSIFICATION_LOGITS = 'mlod_mb_classification_logits'
    PRED_SUB_MB_CLASSIFICATION_LOGITS_LIST = 'mlod_sub_mb_classification_logits_list'
    PRED_MB_CLASSIFICATION_SOFTMAX = 'mlod_mb_classification_softmax'
    PRED_MB_SUB_CLASSIFICATION_SOFTMAX_LIST = 'mlod_mb_sub_classification_softmax_list'
    PRED_MB_OFFSETS = 'mlod_mb_offsets'
    PRED_MB_SUB_OFFSETS_LIST = 'mlod_mb_offsets_list'
    PRED_MB_OFFSETS_RATIO = 'mlod_mb_offsets_ratio'
    PRED_MB_SUB_OFFSETS_RATIO_LIST = 'mlod_mb_offsets_ratio_list'
    PRED_MB_ANGLE_VECTORS = 'mlod_mb_angle_vectors'

    # Top predictions after BEV NMS
    PRED_TOP_CLASSIFICATION_LOGITS = 'mlod_top_classification_logits'
    PRED_TOP_CLASSIFICATION_SOFTMAX = 'mlod_top_classification_softmax'
    PRED_TOP_SUB_CLASSIFICATION_SOFTMAX_LIST = 'mlod_top_sub_classification_softmax_list'

    PRED_TOP_PREDICTION_ANCHORS = 'mlod_top_prediction_anchors'
    PRED_TOP_PREDICTION_BOXES_3D = 'mlod_top_prediction_boxes_3d'
    PRED_TOP_ORIENTATIONS = 'mlod_top_orientations'

    # Other box representations
    PRED_TOP_BOXES_8C = 'mlod_top_regressed_boxes_8c'
    PRED_TOP_BOXES_4C = 'mlod_top_prediction_boxes_4c'

    # Mini batch (mb) predictions (for debugging)
    PRED_MB_MASK = 'mlod_mb_mask'
    PRED_MB_POS_MASK = 'mlod_mb_pos_mask'
    PRED_MB_ANCHORS_GT = 'mlod_mb_anchors_gt'
    PRED_MB_CLASS_INDICES_GT = 'mlod_mb_gt_classes'

    # All predictions (for debugging)
    PRED_ALL_CLASSIFICATIONS = 'mlod_classifications'
    PRED_ALL_OFFSETS = 'mlod_offsets'
    PRED_ALL_ANGLE_VECTORS = 'mlod_angle_vectors'

    PRED_MAX_IOUS = 'mlod_max_ious'
    PRED_ALL_IOUS = 'mlod_anchor_ious'

    ##############################
    # Keys for Loss
    ##############################
    LOSS_FINAL_CLASSIFICATION = 'mlod_classification_loss'
    LOSS_FINAL_REGRESSION = 'mlod_regression_loss'

    # (for debugging)
    LOSS_FINAL_ORIENTATION = 'mlod_orientation_loss'
    LOSS_FINAL_LOCALIZATION = 'mlod_localization_loss'

    def __init__(self, model_config, train_val_test, dataset):
        """
        Args:
            model_config: configuration for the model
            train_val_test: "train", "val", or "test"
            dataset: the dataset that will provide samples and ground truth
        """

        # Sets model configs (_config)
        super(MlodModel, self).__init__(model_config)

        self.dataset = dataset

        # Dataset config
        self._num_final_classes = self.dataset.num_classes + 1

        # Input config
        input_config = self._config.input_config
        self._bev_pixel_size = np.asarray(
            [input_config.bev_dims_h, input_config.bev_dims_w])
        self._bev_depth = input_config.bev_depth

        self._img_pixel_size = np.asarray(
            [input_config.img_dims_h, input_config.img_dims_w])
        self._img_depth = [input_config.img_depth]

        # MLOD config
        mlod_config = self._config.mlod_config
        self._proposal_roi_crop_size = \
            [mlod_config.mlod_proposal_roi_crop_size] * 2
        self._positive_selection = mlod_config.mlod_positive_selection
        self._nms_size = mlod_config.mlod_nms_size
        self._nms_iou_threshold = mlod_config.mlod_nms_iou_thresh
        self._path_drop_probabilities = self._config.path_drop_probabilities
        self._box_rep = mlod_config.mlod_box_representation
        self.apply_occ_mask = mlod_config.apply_occ_mask
        self.occ_quantile_level = mlod_config.occ_quantile_level

        if self._box_rep not in [
                'box_3d', 'box_8c', 'box_8co', 'box_4c', 'box_4ca'
        ]:
            raise ValueError('Invalid box representation', self._box_rep)

        # Create the RpnModel
        self._rpn_model = RpnModel(model_config, train_val_test, dataset)

        self.lidar_only = self._rpn_model.lidar_only
        self.num_views = self._rpn_model.num_views
        self.offsets_ratio = mlod_config.offsets_ratio

        #Create the occlusion masking model
        self._n_split = 6
        self._occ_mask_layer = OccMaskLayer()

        if train_val_test not in ["train", "val", "test"]:
            raise ValueError('Invalid train_val_test value,'
                             'should be one of ["train", "val", "test"]')
        self._train_val_test = train_val_test
        self._is_training = (self._train_val_test == 'train')

        self.sample_info = {}

        self.img_idx = self.model_config.image_channel_indices
        if len(self.img_idx) > 1:
            self.multi_scale_image = True
        else:
            self.multi_scale_image = False
        self.feature_names = self.model_config.image_layer_extract
        self.feature_names = [
            'vgg_16/pyramid_fusion3', 'vgg_16/pyramid_fusion1'
        ]
        self.off_out_size = mlod_fc_layer_utils.OFFSETS_OUTPUT_SIZE[
            self._box_rep]

    def build(self):
        rpn_model = self._rpn_model

        # Share the same prediction dict as RPN
        prediction_dict = rpn_model.build()

        top_anchors = prediction_dict[RpnModel.PRED_TOP_ANCHORS]
        ground_plane = rpn_model.placeholders[RpnModel.PL_GROUND_PLANE]

        class_labels = rpn_model.placeholders[RpnModel.PL_LABEL_CLASSES]

        depth_map = prediction_dict[RpnModel.PRED_DEPTH_MAP]

        with tf.variable_scope('mlod_projection'):

            if self._config.expand_proposals_xz > 0.0:

                expand_length = self._config.expand_proposals_xz

                # Expand anchors along x and z
                with tf.variable_scope('expand_xz'):
                    expanded_dim_x = top_anchors[:, 3] + expand_length
                    expanded_dim_z = top_anchors[:, 5] + expand_length

                    expanded_anchors = tf.stack([
                        top_anchors[:, 0], top_anchors[:, 1], top_anchors[:,
                                                                          2],
                        expanded_dim_x, top_anchors[:, 4], expanded_dim_z
                    ],
                                                axis=1)

                mlod_projection_in = expanded_anchors

            else:
                mlod_projection_in = top_anchors

            with tf.variable_scope('bev'):
                # Project top anchors into bev and image spaces
                bev_proposal_boxes, bev_proposal_boxes_norm = \
                    anchor_projector.project_to_bev(
                        mlod_projection_in,
                        self.dataset.kitti_utils.bev_extents)

                # Reorder projected boxes into [y1, x1, y2, x2]
                bev_proposal_boxes_tf_order = \
                    anchor_projector.reorder_projected_boxes(
                        bev_proposal_boxes)
                bev_proposal_boxes_norm_tf_order = \
                    anchor_projector.reorder_projected_boxes(
                        bev_proposal_boxes_norm)

            with tf.variable_scope('img'):
                if self.lidar_only:
                    image_shape = tf.cast(
                        tf.shape(rpn_model.placeholders[RpnModel.PL_IMG_INPUT])
                        [1:3], tf.float32)
                    img_proposal_boxes_norm_tf_order = []
                    for i in range(self.num_views):
                        img_proposal_boxes, img_proposal_boxes_norm = \
                        anchor_projector.tf_project_to_image_space(
                            mlod_projection_in,
                            rpn_model.placeholders[RpnModel.PL_CALIB_P2][i],
                            image_shape)
                        # Only reorder the normalized img
                        img_proposal_boxes_norm_tf_order.append(
                            anchor_projector.reorder_projected_boxes(
                                img_proposal_boxes_norm))
                else:
                    image_shape = tf.cast(
                        tf.shape(rpn_model.placeholders[RpnModel.PL_IMG_INPUT])
                        [0:2], tf.float32)
                    img_proposal_boxes, img_proposal_boxes_norm = \
                        anchor_projector.tf_project_to_image_space(
                            mlod_projection_in,
                            rpn_model.placeholders[RpnModel.PL_CALIB_P2],
                            image_shape)
                    img_proposal_boxes_tf_order = \
                        anchor_projector.reorder_projected_boxes(
                            img_proposal_boxes)
                    img_proposal_boxes_norm_tf_order = \
                        anchor_projector.reorder_projected_boxes(
                            img_proposal_boxes_norm)

            bev_feature_maps = rpn_model.bev_feature_maps
            img_feature_maps = rpn_model.img_feature_maps

        if not (self._path_drop_probabilities[0] ==
                self._path_drop_probabilities[1] == 1.0) \
           and (self.lidar_only and self.num_views > 0):

            with tf.variable_scope('mlod_path_drop'):

                img_mask = rpn_model.img_path_drop_mask
                bev_mask = rpn_model.bev_path_drop_mask
                if self.lidar_only:
                    #img_feature_maps_list = []
                    for i in range(self.num_views):
                        img_feature_maps[i] = tf.multiply(
                            img_feature_maps[i], img_mask)
                        #img_feature_maps = img_feature_maps_list
                else:
                    img_feature_maps = tf.multiply(img_feature_maps, img_mask)

                bev_feature_maps = tf.multiply(bev_feature_maps, bev_mask)
        else:
            bev_mask = tf.constant(1.0)
            img_mask = tf.constant(1.0)

        # ROI Pooling
        with tf.variable_scope('mlod_roi_pooling'):

            def get_box_indices(boxes):
                proposals_shape = boxes.get_shape().as_list()
                if any(dim is None for dim in proposals_shape):
                    proposals_shape = tf.shape(boxes)
                ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32)
                multiplier = tf.expand_dims(
                    tf.range(start=0, limit=proposals_shape[0]), 1)
                return tf.reshape(ones_mat * multiplier, [-1])

            bev_boxes_norm_batches = tf.expand_dims(bev_proposal_boxes_norm,
                                                    axis=0)

            # These should be all 0's since there is only 1 image
            tf_box_indices = get_box_indices(bev_boxes_norm_batches)

            # Do ROI Pooling on BEV
            bev_rois = tf.image.crop_and_resize(
                bev_feature_maps,
                bev_proposal_boxes_norm_tf_order,
                tf_box_indices,
                self._proposal_roi_crop_size,
                name='bev_rois')
            # Do ROI Pooling on image
            if self.lidar_only:
                img_rois = []
                for i in range(self.num_views):
                    img_rois.append(
                        tf.image.crop_and_resize(
                            img_feature_maps[i],
                            img_proposal_boxes_norm_tf_order[i],
                            tf_box_indices,
                            self._proposal_roi_crop_size,
                            name='img_rois'))
            else:
                img_rois_list = []

                if self.multi_scale_image:
                    img_end_points = rpn_model.img_end_points
                    #print(img_end_points)
                    img_feature_maps_list = [
                        img_end_points[tensor_name]
                        for tensor_name in self.feature_names
                    ]
                else:
                    two_features = False
                    if two_features:
                        img_end_points = rpn_model.img_end_points
                        img_feature_maps_list = [
                            img_end_points[tensor_name]
                            for tensor_name in self.feature_names
                        ]
                    else:
                        img_feature_maps_list = [img_feature_maps]

                for img_feature_maps in img_feature_maps_list:
                    img_rois_list.append(
                        tf.image.crop_and_resize(
                            img_feature_maps,
                            img_proposal_boxes_norm_tf_order,
                            tf_box_indices,
                            self._proposal_roi_crop_size,
                            name='img_rois'))

        # Occlusion masking
        if self.apply_occ_mask:
            ref_depth_min = mlod_projection_in[:,
                                               2] - mlod_projection_in[:,
                                                                       5] / 2 - 0.5
            ref_depth_max = mlod_projection_in[:,
                                               2] + mlod_projection_in[:,
                                                                       5] / 2 + 0.5
            #no background masking
            #ref_depth_max = tf.ones_like(ref_depth_min)*100

            occ_mask = self._occ_mask_layer.build(
                depth_map, img_proposal_boxes_norm_tf_order, tf_box_indices,
                ref_depth_min, ref_depth_max, self._n_split, [8, 8],
                self._proposal_roi_crop_size, self.occ_quantile_level)

            img_rois_masked_list = [
                tf.multiply(img_rois, occ_mask, name='masked_img')
                for img_rois in img_rois_list
            ]
        else:
            img_rois_masked_list = img_rois_list

        # Get anchors dimension
        boxes_3d_x_dim = tf.abs(bev_proposal_boxes[:, 0] -
                                bev_proposal_boxes[:, 2])
        boxes_3d_z_dim = tf.abs(bev_proposal_boxes[:, 1] -
                                bev_proposal_boxes[:, 3])
        boxes_3d_dim = tf.stack([
            boxes_3d_x_dim, boxes_3d_x_dim, boxes_3d_x_dim, boxes_3d_x_dim,
            boxes_3d_z_dim, boxes_3d_z_dim, boxes_3d_z_dim, boxes_3d_z_dim,
            tf.ones_like(boxes_3d_z_dim),
            tf.ones_like(boxes_3d_z_dim)
        ],
                                axis=1)

        boxes_2d_x_dim = tf.abs(img_proposal_boxes[:, 0] -
                                img_proposal_boxes[:, 2])
        boxes_2d_y_dim = tf.abs(img_proposal_boxes[:, 1] -
                                img_proposal_boxes[:, 3])
        boxes_2d_dim = tf.stack([
            boxes_2d_x_dim, boxes_2d_y_dim,
            tf.ones_like(boxes_2d_x_dim),
            tf.ones_like(boxes_2d_y_dim)
        ],
                                axis=1)

        # Fully connected layers (Box Predictor)
        mlod_layers_config = self.model_config.layers_config.mlod_config
        cls_input_weights = [
            mlod_layers_config.cls_input_weights[0] * bev_mask,
            mlod_layers_config.cls_input_weights[1] * img_mask
        ]
        reg_input_weights = [
            mlod_layers_config.reg_input_weights[0] * bev_mask,
            mlod_layers_config.reg_input_weights[1] * img_mask
        ]

        multi_check = rpn_model.multi_check
        cls_reg_separated = rpn_model.cls_reg_separated
        reg_var = self._config.reg_var

        if cls_reg_separated:
            fusion_cls_out = ['cls']
            fusion_reg_out = ['offset', 'ang']
        else:
            fusion_net_out = ['cls', 'offset', 'ang']

        if self.lidar_only:
            img_rois_masked.append(bev_rois)
            mlod_mask = [img_mask] * self.num_views + [bev_mask]
            fc_output_layers = \
                mlod_fc_layers_builder.build(
                    layers_config=mlod_layers_config,
                    input_rois=img_rois_masked,
                    input_weights=mlod_mask,
                    num_final_classes=self._num_final_classes,
                    box_rep=self._box_rep,
                    top_anchors=top_anchors,
                    ground_plane=ground_plane,
                    is_training=self._is_training,
                    cls_reg_separated=cls_reg_separated)
        else:
            if two_features:
                rois_masked_list_cls = [img_rois_masked_list[0]]
                rois_masked_list_reg = [img_rois_masked_list[1]]
                rois_masked_list_cls.insert(0, bev_rois)
                rois_masked_list_reg.insert(0, bev_rois)
            else:
                rois_masked_list = img_rois_masked_list
                rois_masked_list.insert(0, bev_rois)
                rois_masked_list_cls = rois_masked_list
                rois_masked_list_reg = rois_masked_list
            if not cls_reg_separated:
                fc_output_layers = \
                    mlod_fc_layers_builder.build(
                        layers_config=mlod_layers_config,
                        input_rois=rois_masked_list,
                        cls_input_weights=cls_input_weights,
                        reg_input_weights=reg_input_weights,
                        num_final_classes=self._num_final_classes,
                        box_rep=self._box_rep,
                        top_anchors=top_anchors,
                        ground_plane=ground_plane,
                        is_training=self._is_training,
                        variables_name='box_classifier_regressor',
                        multi_check=multi_check,
                        net_out = fusion_net_out,
                        img_idx = self.img_idx)

                all_cls_logits = \
                    fc_output_layers[mlod_fc_layers_builder.KEY_CLS_LOGITS]
                all_offsets = fc_output_layers[
                    mlod_fc_layers_builder.KEY_OFFSETS]
                all_angle_vectors = \
                    fc_output_layers.get(mlod_fc_layers_builder.KEY_ANGLE_VECTORS)
                if multi_check:
                    sub_cls_logits_list = \
                        fc_output_layers[mlod_fc_layers_builder.KEY_SUB_CLS_LOGITS_LIST]
                    sub_reg_offset_list = fc_output_layers[
                        mlod_fc_layers_builder.KEY_SUB_REG_OFFSETS_LIST]
                else:
                    sub_cls_logits_list = []
                    sub_reg_offset_list = []

            else:

                fc_output_layers1 = \
                    mlod_fc_layers_builder.build(
                        layers_config=mlod_layers_config,
                        input_rois=rois_masked_list_cls,
                        cls_input_weights=cls_input_weights,
                        reg_input_weights=reg_input_weights,
                        num_final_classes=self._num_final_classes,
                        box_rep=self._box_rep,
                        top_anchors=top_anchors,
                        ground_plane=ground_plane,
                        is_training=self._is_training,
                        variables_name='box_classifier',
                        multi_check=multi_check,
                        net_out = fusion_cls_out,
                        img_idx = self.img_idx)

                all_cls_logits = \
                   fc_output_layers1[mlod_fc_layers_builder.KEY_CLS_LOGITS]
                sub_cls_logits_list = \
                    fc_output_layers1[mlod_fc_layers_builder.KEY_SUB_CLS_LOGITS_LIST]
                selected_img_idx = fc_output_layers1[
                    mlod_fc_layers_builder.KEY_SELECTED_IMG_IDX]

                fc_output_layers2 = \
                    mlod_fc_layers_builder.build(
                        layers_config=mlod_layers_config,
                        input_rois=rois_masked_list_reg,
                        cls_input_weights=cls_input_weights,
                        reg_input_weights=reg_input_weights,
                        num_final_classes=self._num_final_classes,
                        box_rep=self._box_rep,
                        top_anchors=top_anchors,
                        ground_plane=ground_plane,
                        is_training=self._is_training,
                        variables_name='box_regressor',
                        multi_check=multi_check,
                        net_out = fusion_reg_out,
                        img_idx = self.img_idx,
                        selected_img_idx_in = selected_img_idx)

                if self.offsets_ratio:
                    all_offsets_ratio = fc_output_layers2[
                        mlod_fc_layers_builder.KEY_OFFSETS]
                    sub_reg_offset_ratio_list = fc_output_layers2[
                        mlod_fc_layers_builder.KEY_SUB_REG_OFFSETS_LIST]

                    all_offsets = all_offsets_ratio * boxes_3d_dim * reg_var
                    sub_reg_offset_list = []
                    for branch_idx, sub_reg_offset_ratio in enumerate(
                            sub_reg_offset_ratio_list):
                        if branch_idx in self.img_idx:
                            sub_reg_offset = sub_reg_offset_ratio * boxes_2d_dim * reg_var
                        else:
                            sub_reg_offset = sub_reg_offset_ratio * boxes_3d_dim * reg_var
                        sub_reg_offset_list.append(sub_reg_offset)
                else:
                    all_offsets_multi_classes = fc_output_layers2[
                        mlod_fc_layers_builder.KEY_OFFSETS]
                    sub_reg_offset_multi_classes_list = fc_output_layers2[
                        mlod_fc_layers_builder.KEY_SUB_REG_OFFSETS_LIST]

                all_angle_vectors = \
                    fc_output_layers2.get(mlod_fc_layers_builder.KEY_ANGLE_VECTORS)

        #select class in offsets
        print(all_offsets_multi_classes, all_cls_logits)
        all_offsets = self.class_selection(all_offsets_multi_classes,
                                           all_cls_logits, self.off_out_size)
        sub_reg_offset_list = []
        for branch_idx, sub_offsets_mc in enumerate(
                sub_reg_offset_multi_classes_list):
            sub_logits = sub_cls_logits_list[branch_idx]
            if branch_idx in self.img_idx:
                sub_offsets = self.class_selection(sub_offsets_mc, sub_logits,
                                                   4)
            else:
                sub_offsets = self.class_selection(sub_offsets_mc, sub_logits,
                                                   self.off_out_size)
            sub_reg_offset_list.append(sub_offsets)

        sub_cls_softmax_list = []
        with tf.variable_scope('softmax'):
            all_cls_softmax = tf.nn.softmax(all_cls_logits)

            for sub_logits in sub_cls_logits_list:
                sub_cls_softmax = tf.nn.softmax(sub_logits)
                sub_cls_softmax_list.append(sub_cls_softmax)

        ######################################################
        # Subsample mini_batch for the loss function
        ######################################################
        # Get the ground truth tensors
        anchors_gt = rpn_model.placeholders[RpnModel.PL_LABEL_ANCHORS]
        if self._box_rep in ['box_3d', 'box_4ca']:
            boxes_3d_gt = rpn_model.placeholders[RpnModel.PL_LABEL_BOXES_3D]
            orientations_gt = boxes_3d_gt[:, 6]
        elif self._box_rep in ['box_8c', 'box_8co', 'box_4c']:
            boxes_3d_gt = rpn_model.placeholders[RpnModel.PL_LABEL_BOXES_3D]
        else:
            raise NotImplementedError('Ground truth tensors not implemented')
        boxes_2d_gt = rpn_model.placeholders[RpnModel.PL_LABEL_BOXES_2D]

        # Project anchor_gts to 2D bev
        with tf.variable_scope('mlod_gt_projection'):
            # TODO: (#140) fix kitti_util
            bev_anchor_boxes_gt, _ = anchor_projector.project_to_bev(
                anchors_gt, self.dataset.kitti_utils.bev_extents)

            bev_anchor_boxes_gt_tf_order = \
                anchor_projector.reorder_projected_boxes(bev_anchor_boxes_gt)

            img_anchor_boxes_gt_tf_order = \
                anchor_projector.reorder_projected_boxes(boxes_2d_gt)

        with tf.variable_scope('mlod_box_list'):
            #bev
            # Convert to box_list format
            anchor_box_list_gt = box_list.BoxList(bev_anchor_boxes_gt_tf_order)
            anchor_box_list = box_list.BoxList(bev_proposal_boxes_tf_order)

            #img
            img_box_list_gt = box_list.BoxList(img_anchor_boxes_gt_tf_order)
            img_box_list = box_list.BoxList(img_proposal_boxes_tf_order)

        mb_mask, mb_bev_class_label_indices, mb_img_class_label_indices, \
            mb_gt_indices, mb_img_gt_indices = \
            self.sample_mini_batch(
                anchor_box_list_gt=anchor_box_list_gt,
                anchor_box_list=anchor_box_list,
                img_box_list_gt=img_box_list_gt,
                img_box_list=img_box_list,
                class_labels=class_labels)

        # Create classification one_hot vector
        with tf.variable_scope('mlod_one_hot_classes'):
            mb_classification_gt = tf.one_hot(
                mb_bev_class_label_indices,
                depth=self._num_final_classes,
                on_value=1.0 - self._config.label_smoothing_epsilon,
                off_value=(self._config.label_smoothing_epsilon /
                           self.dataset.num_classes))

        with tf.variable_scope('mlod_img_one_hot_classes'):
            mb_img_classification_gt = tf.one_hot(
                mb_img_class_label_indices,
                depth=self._num_final_classes,
                on_value=1.0 - self._config.label_smoothing_epsilon,
                off_value=(self._config.label_smoothing_epsilon /
                           self.dataset.num_classes))

        # Mask predictions
        with tf.variable_scope('mlod_apply_mb_mask'):
            # Classification
            mb_classifications_logits = tf.boolean_mask(
                all_cls_logits, mb_mask)
            mb_classifications_softmax = tf.boolean_mask(
                all_cls_softmax, mb_mask)
            sub_mb_classifications_logits_list = []
            for br_idx, sub_cls_logits in enumerate(sub_cls_logits_list):
                sub_mb_classifications_logits = tf.boolean_mask(
                    sub_cls_logits, mb_mask)
                sub_mb_classifications_logits_list.append(
                    sub_mb_classifications_logits)
            mb_sub_classifications_softmax_list = []
            for br_idx, sub_cls_softmax in enumerate(sub_cls_softmax_list):
                mb_sub_classifications_softmax = tf.boolean_mask(
                    sub_cls_softmax, mb_mask)
                mb_sub_classifications_softmax_list.append(
                    mb_sub_classifications_softmax)

            # Offsets
            mb_offsets = tf.boolean_mask(all_offsets, mb_mask)

            mb_sub_offsets_list = []
            for sub_offset in sub_reg_offset_list:
                mb_sub_offsets = tf.boolean_mask(sub_offset, mb_mask)
                mb_sub_offsets_list.append(mb_sub_offsets)

            # Angle Vectors
            if all_angle_vectors is not None:
                mb_angle_vectors = tf.boolean_mask(all_angle_vectors, mb_mask)
            else:
                mb_angle_vectors = None

        # Encode anchor offsets
        with tf.variable_scope('mlod_encode_mb_anchors'):
            mb_anchors = tf.boolean_mask(top_anchors, mb_mask)

            if self._box_rep == 'box_3d':
                # Gather corresponding ground truth anchors for each mb sample
                mb_anchors_gt = tf.gather(anchors_gt, mb_gt_indices)
                mb_offsets_gt = anchor_encoder.tf_anchor_to_offset(
                    mb_anchors, mb_anchors_gt)

                # Gather corresponding ground truth orientation for each
                # mb sample
                mb_orientations_gt = tf.gather(orientations_gt, mb_gt_indices)
            elif self._box_rep in ['box_8c', 'box_8co']:

                # Get boxes_3d ground truth mini-batch and convert to box_8c
                mb_boxes_3d_gt = tf.gather(boxes_3d_gt, mb_gt_indices)
                if self._box_rep == 'box_8c':
                    mb_boxes_8c_gt = \
                        box_8c_encoder.tf_box_3d_to_box_8c(mb_boxes_3d_gt)
                elif self._box_rep == 'box_8co':
                    mb_boxes_8c_gt = \
                        box_8c_encoder.tf_box_3d_to_box_8co(mb_boxes_3d_gt)

                # Convert proposals: anchors -> box_3d -> box8c
                proposal_boxes_3d = \
                    box_3d_encoder.anchors_to_box_3d(top_anchors, fix_lw=True)
                proposal_boxes_8c = \
                    box_8c_encoder.tf_box_3d_to_box_8c(proposal_boxes_3d)

                # Get mini batch offsets
                mb_boxes_8c = tf.boolean_mask(proposal_boxes_8c, mb_mask)
                mb_offsets_gt = box_8c_encoder.tf_box_8c_to_offsets(
                    mb_boxes_8c, mb_boxes_8c_gt)

                # Flatten the offsets to a (N x 24) vector
                mb_offsets_gt = tf.reshape(mb_offsets_gt, [-1, 24])

            elif self._box_rep in ['box_4c', 'box_4ca']:

                # Get ground plane for box_4c conversion
                ground_plane = self._rpn_model.placeholders[
                    self._rpn_model.PL_GROUND_PLANE]

                # Convert gt boxes_3d -> box_4c
                mb_boxes_3d_gt = tf.gather(boxes_3d_gt, mb_gt_indices)
                mb_boxes_4c_gt = box_4c_encoder.tf_box_3d_to_box_4c(
                    mb_boxes_3d_gt, ground_plane)

                # Convert proposals: anchors -> box_3d -> box_4c
                proposal_boxes_3d = \
                    box_3d_encoder.anchors_to_box_3d(top_anchors, fix_lw=True)
                proposal_boxes_4c = \
                    box_4c_encoder.tf_box_3d_to_box_4c(proposal_boxes_3d,
                                                       ground_plane)

                # Get mini batch
                mb_boxes_4c = tf.boolean_mask(proposal_boxes_4c, mb_mask)
                mb_offsets_gt = box_4c_encoder.tf_box_4c_to_offsets(
                    mb_boxes_4c, mb_boxes_4c_gt)

                if self._box_rep == 'box_4ca':
                    # Gather corresponding ground truth orientation for each
                    # mb sample
                    mb_orientations_gt = tf.gather(orientations_gt,
                                                   mb_gt_indices)

            else:
                raise NotImplementedError(
                    'Anchor encoding not implemented for', self._box_rep)

            #2d bounding boxes offset

            # anchor projected 2d bounding box

            #mb_img_proposal_boxes_norm = tf.boolean_mask(img_proposal_boxes_norm, mb_mask)
            #mb_boxes_2d = mb_img_proposal_boxes_norm*[image_w,image_h,image_w,image_h]
            #mb_boxes_w = mb_boxes_2d[:,2] - mb_boxes_2d[:,0]
            #mb_boxes_h = mb_boxes_2d[:,3] - mb_boxes_2d[:,1]
            mb_boxes_2d = tf.boolean_mask(img_proposal_boxes, mb_mask)
            mb_boxes_2d_gt = tf.gather(boxes_2d_gt, mb_img_gt_indices)
            mb_offsets_2d_gt = anchor_encoder.tf_2d_box_to_offset(
                mb_boxes_2d, mb_boxes_2d_gt)

        ######################################################
        # ROI summary images
        ######################################################
        mlod_mini_batch_size = \
            self.dataset.kitti_utils.mini_batch_utils.mlod_mini_batch_size
        with tf.variable_scope('bev_mlod_rois'):
            mb_bev_anchors_norm = tf.boolean_mask(
                bev_proposal_boxes_norm_tf_order, mb_mask)
            mb_bev_box_indices = tf.zeros_like(mb_gt_indices, dtype=tf.int32)

            # Show the ROIs of the BEV input density map
            # for the mini batch anchors
            bev_input_rois = tf.image.crop_and_resize(
                self._rpn_model._bev_preprocessed, mb_bev_anchors_norm,
                mb_bev_box_indices, (32, 32))

            bev_input_roi_summary_images = tf.split(bev_input_rois,
                                                    self._bev_depth,
                                                    axis=3)
            tf.summary.image('bev_mlod_rois',
                             bev_input_roi_summary_images[-1],
                             max_outputs=mlod_mini_batch_size)

        with tf.variable_scope('img_mlod_rois'):
            if self.lidar_only:
                for i in range(self.num_views):
                    # ROIs on image input
                    mb_img_anchors_norm = tf.boolean_mask(
                        img_proposal_boxes_norm_tf_order[i], mb_mask)
                    mb_img_box_indices = tf.zeros_like(mb_gt_indices,
                                                       dtype=tf.int32)

                    # Do test ROI pooling on mini batch
                    img_input_rois = tf.image.crop_and_resize(
                        tf.expand_dims(self._rpn_model._img_preprocessed[i],
                                       axis=0), mb_img_anchors_norm,
                        mb_img_box_indices, (32, 32))

                    tf.summary.image('img_mlod_rois',
                                     img_input_rois,
                                     max_outputs=mlod_mini_batch_size)
            else:
                # ROIs on image input
                mb_img_anchors_norm = tf.boolean_mask(
                    img_proposal_boxes_norm_tf_order, mb_mask)
                mb_img_box_indices = tf.zeros_like(mb_gt_indices,
                                                   dtype=tf.int32)

                # Do test ROI pooling on mini batch
                img_input_rois = tf.image.crop_and_resize(
                    self._rpn_model._img_preprocessed, mb_img_anchors_norm,
                    mb_img_box_indices, (32, 32))

                tf.summary.image('img_mlod_rois',
                                 img_input_rois,
                                 max_outputs=mlod_mini_batch_size)

        ######################################################
        # Final Predictions
        ######################################################
        # Get orientations from angle vectors
        if all_angle_vectors is not None:
            with tf.variable_scope('mlod_orientation'):
                all_orientations = \
                    orientation_encoder.tf_angle_vector_to_orientation(
                        all_angle_vectors)

        # Apply offsets to regress proposals
        with tf.variable_scope('mlod_regression'):
            if self._box_rep == 'box_3d':
                prediction_anchors = \
                    anchor_encoder.offset_to_anchor(top_anchors,
                                                    all_offsets)

            elif self._box_rep in ['box_8c', 'box_8co']:
                # Reshape the 24-dim regressed offsets to (N x 3 x 8)
                reshaped_offsets = tf.reshape(all_offsets, [-1, 3, 8])
                # Given the offsets, get the boxes_8c
                prediction_boxes_8c = \
                    box_8c_encoder.tf_offsets_to_box_8c(proposal_boxes_8c,
                                                        reshaped_offsets)
                # Convert corners back to box3D
                prediction_boxes_3d = \
                    box_8c_encoder.box_8c_to_box_3d(prediction_boxes_8c)

                # Convert the box_3d to anchor format for nms
                prediction_anchors = \
                    box_3d_encoder.tf_box_3d_to_anchor(prediction_boxes_3d)

            elif self._box_rep in ['box_4c', 'box_4ca']:
                # Convert predictions box_4c -> box_3d
                prediction_boxes_4c = \
                    box_4c_encoder.tf_offsets_to_box_4c(proposal_boxes_4c,
                                                        all_offsets)

                prediction_boxes_3d = \
                    box_4c_encoder.tf_box_4c_to_box_3d(prediction_boxes_4c,
                                                       ground_plane)

                # Convert to anchor format for nms
                prediction_anchors = \
                    box_3d_encoder.tf_box_3d_to_anchor(prediction_boxes_3d)

            else:
                raise NotImplementedError('Regression not implemented for',
                                          self._box_rep)

        # Apply Non-oriented NMS in BEV
        with tf.variable_scope('mlod_nms'):
            bev_extents = self.dataset.kitti_utils.bev_extents

            with tf.variable_scope('bev_projection'):
                # Project predictions into BEV
                mlod_bev_boxes, _ = anchor_projector.project_to_bev(
                    prediction_anchors, bev_extents)
                mlod_bev_boxes_tf_order = \
                    anchor_projector.reorder_projected_boxes(
                        mlod_bev_boxes)

            # Get top score from second column onward
            all_top_scores = tf.reduce_max(all_cls_logits[:, 1:], axis=1)

            # Apply NMS in BEV
            nms_indices = tf.image.non_max_suppression(
                mlod_bev_boxes_tf_order,
                all_top_scores,
                max_output_size=self._nms_size,
                iou_threshold=self._nms_iou_threshold)

            # Gather predictions from NMS indices
            top_classification_logits = tf.gather(all_cls_logits, nms_indices)
            top_classification_softmax = tf.gather(all_cls_softmax,
                                                   nms_indices)
            top_sub_classification_softmax_list = []
            for sub_cls_softmax in sub_cls_softmax_list:
                top_sub_classification_softmax_list.append(
                    tf.gather(sub_cls_softmax, nms_indices))
            top_2d_proposal = tf.gather(img_proposal_boxes_norm, nms_indices)

            top_prediction_anchors = tf.gather(prediction_anchors, nms_indices)

            if self._box_rep == 'box_3d':
                top_orientations = tf.gather(all_orientations, nms_indices)

            elif self._box_rep in ['box_8c', 'box_8co']:
                top_prediction_boxes_3d = tf.gather(prediction_boxes_3d,
                                                    nms_indices)
                top_prediction_boxes_8c = tf.gather(prediction_boxes_8c,
                                                    nms_indices)

            elif self._box_rep == 'box_4c':
                top_prediction_boxes_3d = tf.gather(prediction_boxes_3d,
                                                    nms_indices)
                top_prediction_boxes_4c = tf.gather(prediction_boxes_4c,
                                                    nms_indices)

            elif self._box_rep == 'box_4ca':
                top_prediction_boxes_3d = tf.gather(prediction_boxes_3d,
                                                    nms_indices)
                top_prediction_boxes_4c = tf.gather(prediction_boxes_4c,
                                                    nms_indices)
                top_orientations = tf.gather(all_orientations, nms_indices)

            else:
                raise NotImplementedError('NMS gather not implemented for',
                                          self._box_rep)

        if self._train_val_test in ['train', 'val']:
            # Additional entries are added to the shared prediction_dict
            # Mini batch predictions
            prediction_dict['cls_softmax'] = all_cls_softmax
            prediction_dict[self.PRED_SUB_MB_CLASSIFICATION_LOGITS_LIST] = \
                sub_mb_classifications_logits_list
            prediction_dict[self.PRED_MB_CLASSIFICATION_LOGITS] = \
                mb_classifications_logits
            prediction_dict[self.PRED_MB_CLASSIFICATION_SOFTMAX] = \
                mb_classifications_softmax
            prediction_dict[self.PRED_MB_SUB_CLASSIFICATION_SOFTMAX_LIST] = \
                mb_sub_classifications_softmax_list
            prediction_dict[self.PRED_MB_OFFSETS] = mb_offsets
            prediction_dict[
                self.PRED_MB_SUB_OFFSETS_LIST] = mb_sub_offsets_list

            prediction_dict['top_3d_proposal'] = top_2d_proposal

            prediction_dict['2d_gt_box_rescale'] = img_anchor_boxes_gt_tf_order
            prediction_dict['3d_gt_box_rescale'] = bev_anchor_boxes_gt_tf_order
            prediction_dict['2d_proposed_box'] = img_proposal_boxes_tf_order
            prediction_dict['3d_proposed_box'] = bev_proposal_boxes_tf_order

            # Mini batch ground truth
            prediction_dict[self.PRED_MB_CLASSIFICATIONS_GT] = \
                mb_classification_gt
            prediction_dict[self.PRED_MB_IMG_CLASSIFICATIONS_GT] = \
                mb_img_classification_gt

            prediction_dict[self.PRED_MB_OFFSETS_GT] = mb_offsets_gt
            prediction_dict[self.PRED_MB_OFFSETS_2D_GT] = mb_offsets_2d_gt

            # Top NMS predictions
            prediction_dict[self.PRED_TOP_CLASSIFICATION_LOGITS] = \
                top_classification_logits
            prediction_dict[self.PRED_TOP_CLASSIFICATION_SOFTMAX] = \
                top_classification_softmax
            prediction_dict[self.PRED_TOP_SUB_CLASSIFICATION_SOFTMAX_LIST] = \
                top_sub_classification_softmax_list

            prediction_dict[self.PRED_TOP_PREDICTION_ANCHORS] = \
                top_prediction_anchors

            # Mini batch predictions (for debugging)
            prediction_dict[self.PRED_MB_MASK] = mb_mask
            prediction_dict[self.PRED_MB_CLASS_INDICES_GT] = \
                mb_bev_class_label_indices

            prediction_dict['gt_indices'] = mb_gt_indices
            prediction_dict['img_gt_indices'] = mb_img_gt_indices
            prediction_dict[
                'img_gt_label_indices'] = mb_img_class_label_indices

            # All predictions (for debugging)
            prediction_dict[self.PRED_ALL_CLASSIFICATIONS] = \
                all_cls_logits
            prediction_dict[self.PRED_ALL_OFFSETS] = all_offsets

            # Path drop masks (for debugging)
            prediction_dict['bev_mask'] = bev_mask
            prediction_dict['img_mask'] = img_mask
            prediction_dict[
                'BEV_freeze_mask'] = self._rpn_model.bev_freeze_mask

        else:
            # self._train_val_test == 'test'
            prediction_dict[self.PRED_TOP_CLASSIFICATION_SOFTMAX] = \
                top_classification_softmax
            prediction_dict[self.PRED_TOP_SUB_CLASSIFICATION_SOFTMAX_LIST] = \
                top_sub_classification_softmax_list
            prediction_dict[self.PRED_TOP_PREDICTION_ANCHORS] = \
                top_prediction_anchors
            prediction_dict[self.PRED_ALL_OFFSETS] = all_offsets

        if self._box_rep == 'box_3d':
            prediction_dict[self.PRED_MB_ANCHORS_GT] = mb_anchors_gt
            prediction_dict[self.PRED_MB_ORIENTATIONS_GT] = mb_orientations_gt
            prediction_dict[self.PRED_MB_ANGLE_VECTORS] = mb_angle_vectors

            prediction_dict[self.PRED_TOP_ORIENTATIONS] = top_orientations

            # For debugging
            prediction_dict[self.PRED_ALL_ANGLE_VECTORS] = all_angle_vectors

        elif self._box_rep in ['box_8c', 'box_8co']:
            prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \
                top_prediction_boxes_3d

            # Store the corners before converting for visualization purposes
            prediction_dict[self.PRED_TOP_BOXES_8C] = top_prediction_boxes_8c

        elif self._box_rep == 'box_4c':
            prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \
                top_prediction_boxes_3d
            prediction_dict[self.PRED_TOP_BOXES_4C] = top_prediction_boxes_4c

        elif self._box_rep == 'box_4ca':
            if self._train_val_test in ['train', 'val']:
                prediction_dict[self.PRED_MB_ORIENTATIONS_GT] = \
                    mb_orientations_gt
                prediction_dict[self.PRED_MB_ANGLE_VECTORS] = mb_angle_vectors

            prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \
                top_prediction_boxes_3d
            prediction_dict[self.PRED_TOP_BOXES_4C] = top_prediction_boxes_4c
            prediction_dict[self.PRED_TOP_ORIENTATIONS] = top_orientations

        else:
            raise NotImplementedError('Prediction dict not implemented for',
                                      self._box_rep)

        return prediction_dict

    def sample_mini_batch(self, anchor_box_list_gt, anchor_box_list,
                          img_box_list_gt, img_box_list, class_labels):

        with tf.variable_scope('mlod_create_mb_mask'):
            # Get IoU for every anchor
            all_bev_ious = box_list_ops.iou(anchor_box_list_gt,
                                            anchor_box_list)
            max_bev_ious = tf.reduce_max(all_bev_ious, axis=0)
            max_bev_iou_indices = tf.argmax(all_bev_ious, axis=0)

            all_img_ious = box_list_ops.iou(img_box_list_gt, img_box_list)
            max_img_ious = tf.reduce_max(all_img_ious, axis=0)
            max_img_iou_indices = tf.argmax(all_img_ious, axis=0)

            # Sample a pos/neg mini-batch from anchors with highest IoU match
            mini_batch_utils = self.dataset.kitti_utils.mini_batch_utils
            #bev mask
            mb_bev_mask, mb_pos_bev_mask = mini_batch_utils.sample_mlod_mini_batch(
                max_bev_ious)

            mb_mask, mb_pos_img_mask = mini_batch_utils.double_sample_img_mini_batch(
                max_img_ious, mb_bev_mask)

            mb_bev_class_label_indices = mini_batch_utils.mask_class_label_indices(
                mb_pos_bev_mask, mb_mask, max_bev_iou_indices, class_labels)
            mb_img_class_label_indices = mini_batch_utils.mask_class_label_indices(
                mb_pos_img_mask, mb_mask, max_img_iou_indices, class_labels)

            mb_bev_gt_indices = tf.boolean_mask(max_bev_iou_indices, mb_mask)
            mb_img_gt_indices = tf.boolean_mask(max_img_iou_indices, mb_mask)

        return mb_mask, mb_bev_class_label_indices, mb_img_class_label_indices, mb_bev_gt_indices, mb_img_gt_indices

    def create_feed_dict(self):
        feed_dict = self._rpn_model.create_feed_dict()
        self.sample_info = self._rpn_model.sample_info
        return feed_dict

    def loss(self, prediction_dict):
        # Note: The loss should be using mini-batch values only
        loss_dict, rpn_loss = self._rpn_model.loss(prediction_dict)
        losses_output = mlod_loss_builder.build(self, prediction_dict)

        classification_loss = \
            losses_output[mlod_loss_builder.KEY_CLASSIFICATION_LOSS]

        final_reg_loss = losses_output[mlod_loss_builder.KEY_REGRESSION_LOSS]

        mlod_loss = losses_output[mlod_loss_builder.KEY_MLOD_LOSS]

        offset_loss_norm = \
            losses_output[mlod_loss_builder.KEY_OFFSET_LOSS_NORM]

        loss_dict.update({self.LOSS_FINAL_CLASSIFICATION: classification_loss})
        loss_dict.update({self.LOSS_FINAL_REGRESSION: final_reg_loss})

        # Add localization and orientation losses to loss dict for plotting
        loss_dict.update({self.LOSS_FINAL_LOCALIZATION: offset_loss_norm})

        ang_loss_loss_norm = losses_output.get(
            mlod_loss_builder.KEY_ANG_LOSS_NORM)
        if ang_loss_loss_norm is not None:
            loss_dict.update({self.LOSS_FINAL_ORIENTATION: ang_loss_loss_norm})

        with tf.variable_scope('model_total_loss'):
            total_loss = rpn_loss + mlod_loss

        return loss_dict, total_loss

    def class_selection(self, offsets, cls_logits, off_out_size):
        off_out_all = tf.reshape(offsets,
                                 [-1, self._num_final_classes, off_out_size])
        #select the regression output by class
        cls_idx = tf.argmax(cls_logits, axis=1)
        cls_idx = tf.expand_dims(cls_idx, axis=-1)
        batch_size = tf.cast(tf.shape(offsets)[0], dtype=tf.int64)
        range_tf = tf.range(batch_size, dtype=tf.int64)
        range_tf = tf.expand_dims(range_tf, axis=1)
        selected_idx = tf.concat([range_tf, cls_idx], axis=1)

        off_out = tf.gather_nd(off_out_all, selected_idx)

        return off_out

    def resnet(self, input_layer):
        layer_size = 128
        start_layer = slim.conv2d(
            input_layer,
            layer_size, [3, 3],
            normalizer_fn=slim.batch_norm,
            normalizer_params={'is_training': self._is_training})
        layer = start_layer
        for i in range(2):
            bn_features = slim.batch_norm(layer, is_training=self._is_training)
            relu_feature = tf.nn.relu(bn_features)
            layer = slim.conv2d(relu_feature,
                                layer_size, [3, 3],
                                activation_fn=None)
        return layer + start_layer
Пример #6
0
def train_and_eval(model_config, train_config, eval_config, dataset_config):

    # Dataset Configuration
    dataset_config_train = DatasetBuilder.copy_config(dataset_config)
    dataset_config_eval = DatasetBuilder.copy_config(dataset_config)

    dataset_train = DatasetBuilder.build_kitti_dataset(dataset_config_train,
                                                       use_defaults=False)
    dataset_eval = DatasetBuilder.build_kitti_dataset(dataset_config_eval,
                                                      use_defaults=False)

    model_name = model_config.model_name
    train_val_test = 'train'
    eval_mode = eval_config.eval_mode
    if eval_mode == 'train':
        raise ValueError('Evaluation mode can only be set to `val` or `test`.')

    # keep a copy as this will be overwritten inside
    # the training loop below
    max_train_iter = train_config.max_iterations
    checkpoint_interval = train_config.checkpoint_interval
    eval_interval = eval_config.eval_interval

    if eval_interval < checkpoint_interval or \
            (eval_interval % checkpoint_interval) != 0:
        raise ValueError(
            'Checkpoint interval (given {}) must be greater than and'
            'divisible by the evaluation interval (given {}).'.format(
                eval_interval, checkpoint_interval))

    # Use the evaluation losses file to continue from the latest
    # checkpoint
    already_evaluated_ckpts = evaluator.get_evaluated_ckpts(
        model_config, model_name)

    if len(already_evaluated_ckpts) != 0:
        current_train_iter = already_evaluated_ckpts[-1]
    else:
        current_train_iter = eval_interval

    # while training is not finished
    while current_train_iter <= max_train_iter:
        # Train
        with tf.Graph().as_default():
            if model_name == 'mlod_model':
                model = MlodModel(model_config,
                                  train_val_test=train_val_test,
                                  dataset=dataset_train)
            elif model_name == 'rpn_model':
                model = RpnModel(model_config,
                                 train_val_test=train_val_test,
                                 dataset=dataset_train)
            else:
                raise ValueError('Invalid model name {}'.format(model_name))

            # overwrite the training epochs
            train_config.max_iterations = current_train_iter
            print('\n*************** Training ****************\n')
            trainer.train(model, train_config)
        current_train_iter += eval_interval

        # Evaluate
        with tf.Graph().as_default():
            if model_name == 'mlod_model':
                model = MlodModel(model_config,
                                  train_val_test=eval_mode,
                                  dataset=dataset_eval)
            elif model_name == 'rpn_model':
                model = RpnModel(model_config,
                                 train_val_test=eval_mode,
                                 dataset=dataset_eval)
            else:
                raise ValueError('Invalid model name {}'.format(model_name))

            print('\n*************** Evaluating *****************\n')
            evaluator.run_latest_checkpoints(model, dataset_config_eval)
    print('\n************ Finished training and evaluating *************\n')
Пример #7
0
    def test_load_model_weights(self):
        # Tests stagewise training i.e. loading RPN weights
        # onto MLOD

        train_val_test = 'train'
        # overwrite the training iterations
        self.train_config.max_iterations = 1
        self.train_config.overwrite_checkpoints = True

        rpn_weights = []
        rpn_weights_reload = []

        with tf.Graph().as_default():
            model = RpnModel(self.model_config,
                             train_val_test=train_val_test,
                             dataset=self.dataset)
            trainer.train(model, self.train_config)

            paths_config = self.model_config.paths_config
            rpn_checkpoint_dir = paths_config.checkpoint_dir

            # load the weights back in
            init_op = tf.global_variables_initializer()

            saver = tf.train.Saver()
            with tf.Session() as sess:
                sess.run(init_op)

                trainer_utils.load_checkpoints(rpn_checkpoint_dir, saver)
                checkpoint_to_restore = saver.last_checkpoints[-1]

                trainer_utils.load_model_weights(model, sess,
                                                 checkpoint_to_restore)

                rpn_vars = slim.get_model_variables()
                rpn_weights = sess.run(rpn_vars)
                self.assertGreater(len(rpn_weights),
                                   0,
                                   msg='Loaded RPN weights are empty')

        with tf.Graph().as_default():
            model = MlodModel(self.model_config,
                              train_val_test=train_val_test,
                              dataset=self.dataset)
            model.build()

            # load the weights back in
            init_op = tf.global_variables_initializer()

            saver = tf.train.Saver()
            with tf.Session() as sess:
                sess.run(init_op)

                trainer_utils.load_checkpoints(rpn_checkpoint_dir, saver)
                checkpoint_to_restore = saver.last_checkpoints[-1]
                trainer_utils.load_model_weights(model, sess,
                                                 checkpoint_to_restore)

                mlod_vars = slim.get_model_variables()
                mlod_weights = sess.run(mlod_vars)
                # MLOD weights should include both RPN + MLOD weights
                self.assertGreater(len(mlod_weights),
                                   len(rpn_weights),
                                   msg='Expected more weights for MLOD')

                # grab weights corresponding to RPN by index
                # since the model variables are ordered
                rpn_len = len(rpn_weights)
                loaded_rpn_vars = mlod_vars[0:rpn_len]
                rpn_weights_reload = sess.run(loaded_rpn_vars)

                # Make sure the reloaded weights match the originally
                # loaded weights
                for i in range(rpn_len):
                    np.testing.assert_array_equal(rpn_weights_reload[i],
                                                  rpn_weights[i])