示例#1
0
    def call(self, inputs):
        print('    Detection Layer : call() ', type(inputs), len(inputs))    
        # logt('rpn_proposals_roi ',  inputs[0], verbose = self.verbose)
        # logt('mrcnn_class.shape ',  inputs[1], verbose = self.verbose) 
        # logt('mrcnn_bboxes.shape',  inputs[2], verbose = self.verbose)
        # logt('input_image_meta  ',  inputs[0], verbose = self.verbose) 
        logt('input_gt_class_ids',  inputs[0], verbose = self.verbose) 
        logt('input_gt_bboxes   ',  inputs[1], verbose = self.verbose)
    
        def wrapper(gt_class_ids, gt_bboxes):
        # def wrapper(rois, mrcnn_class, mrcnn_bbox, image_meta, gt_class_ids, gt_bboxes):
            from mrcnn.utils import parse_image_meta
            mod_detections_batch = []
            
            for b in range(self.config.BATCH_SIZE):
                                
                ##---------------------------------------------------------------------------------------------
                ## Run the regular detection graph, as we do in inference mode
                ## 24-01-2019 : In add_evaluation_detections_1 & 2 we do not need the inference detections, 
                ## So this has been commented out. 
                ##---------------------------------------------------------------------------------------------
                # _, _, window, _ =  parse_image_meta(image_meta)
                # detections      =  refine_detections(rois[b], mrcnn_class[b], mrcnn_bbox[b], window[b], self.config)
                
                ##---------------------------------------------------------------------------------------------
                ## Call routine to build the control file using GT annotations, adding false detections:
                ##---------------------------------------------------------------------------------------------
                # mod_detections  =  add_evaluation_detections_1(detections, image_meta[b], gt_class_ids[b], gt_bboxes[b], self.config)
                mod_detections, max_overlap  =  self.build_evaluation_detections( gt_class_ids[b], gt_bboxes[b], self.config, self.class_pred_stats)
                
                # if self.config.VERBOSE:
                    # print(' original detections (GT annotations) shape        :', gt_bboxes[b].shape)                
                    # print(' modified detections (after adding false positives):', mod_detections.shape)
                    # print(' Max Overlap: ', max_overlap)
                    # print(detections)
                    # pass
                    
                # Pad with zeros if detections < DETECTION_MAX_INSTANCES
                gap = self.config.DETECTION_MAX_INSTANCES - mod_detections.shape[0]
                assert gap >= 0
                if gap > 0:
                    mod_detections = np.pad(mod_detections, [(0, gap), (0, 0)], 'constant', constant_values=0)
                
                mod_detections_batch.append(mod_detections)

            # Stack detections and cast to float32
            # TODO: track where float64 is introduced
            mod_detections_batch = np.array(mod_detections_batch).astype(np.float32)
            num_columns = mod_detections_batch.shape[-1]

            # Reshape output
            # [batch, num_detections, (y1, x1, y2, x2, class_score, dt_ind)] in pixels
            return np.reshape(mod_detections_batch, [self.config.BATCH_SIZE, self.config.DETECTION_MAX_INSTANCES, num_columns])

        # Return wrapped function
        return tf.py_func(wrapper, inputs, tf.float32, name = 'detections')
示例#2
0
    def call(self, inputs):
        print('    Detection Layer : call() ', type(inputs), len(inputs))
        logt('rpn_proposals_roi ', inputs[0], verbose=self.verbose)
        logt('mrcnn_class.shape ', inputs[1], verbose=self.verbose)
        logt('mrcnn_bboxes.shape', inputs[2], verbose=self.verbose)
        logt('input_image_meta  ', inputs[3], verbose=self.verbose)

        def wrapper(rois, mrcnn_class, mrcnn_bbox, image_meta):
            from mrcnn.utils import parse_image_meta
            detections_batch = []
            # logt('detection wrapper - rpn_proposals_roi  ',  rois       , verbose = self.verbose)
            # logt('detection wrapper - mrcnn_class.shape  ',  mrcnn_class, verbose = self.verbose)
            # logt('detection wrapper - mrcnn_bboxes.shape ',  mrcnn_bbox , verbose = self.verbose)
            # logt('detection wrapper - image_meta         ',  image_meta , verbose = self.verbose)
            # process item per item in batch

            for b in range(self.config.BATCH_SIZE):
                _, _, window, _ = parse_image_meta(image_meta)

                detections = refine_detections(rois[b], mrcnn_class[b],
                                               mrcnn_bbox[b], window[b],
                                               self.config)
                # if self.verbose:
                # print('\n\n config.DETECTION_MAX_INSTANCES: ', self.config.DETECTION_MAX_INSTANCES)
                # print(' Detections shape:', detections.shape)
                # print(detections)

                # Pad with zeros if detections < DETECTION_MAX_INSTANCES
                gap = self.config.DETECTION_MAX_INSTANCES - detections.shape[0]
                assert gap >= 0
                if gap > 0:
                    detections = np.pad(detections, [(0, gap), (0, 0)],
                                        'constant',
                                        constant_values=0)

                detections_batch.append(detections)

            # Stack detections and cast to float32
            # TODO: track where float64 is introduced
            detections_batch = np.array(detections_batch).astype(np.float32)
            num_columns = detections_batch.shape[-1]

            # Reshape output
            # [batch, num_detections, (y1, x1, y2, x2, class_score)] in pixels
            return np.reshape(detections_batch, [
                self.config.BATCH_SIZE, self.config.DETECTION_MAX_INSTANCES,
                num_columns
            ])

        # Return wrapped function
        return tf.py_func(wrapper, inputs, tf.float32, name="detections")
示例#3
0
    def compute_output_shape(self, input_shape):
        # may need to change dimensions of first return from IMAGE_SHAPE to MAX_DIM
        input_num_classes = input_shape[1][1]
        input_detections = input_shape[1][2]
        input_columns = input_shape[1][3]
        logt('   FCNScoringLayer - Compute output shape() ',
             verbose=self.config.VERBOSE)
        logt('   input_num_classes : ',
             input_num_classes,
             verbose=self.config.VERBOSE)
        logt('   input_detections  : ',
             input_detections,
             verbose=self.config.VERBOSE)
        logt('   input_columns     : ',
             input_columns,
             verbose=self.config.VERBOSE)

        return [(None, input_num_classes, input_detections, input_columns)]
def fcn_heatmap_BCE_loss_graph(target_heatmap, pred_heatmap):
    '''
    Binary Cross Entropy Loss for the FCN heatmaps.
    
    Apply a per-pixel sigmoid and binary loss, similar to the Lmask loss calculation
    in MaskRCNN. 
    Two approaches :
    1- Only calaculate loss for classes which have active GT bounding boxes
    2- Calculate for all classes 
    
    We will implement approach 1. 
    
    
    target_heatmaps:    [batch, height, width, num_classes].
                        A float32 tensor of values 0 or 1. Uses zero padding to fill array.

    target_class_ids:   [batch, num_rois]. Integer class IDs. Zero padded.

    pred_masks:         [batch, height, width, num_classes]  float32 tensor
                        with values from 0 to 1.

    # active_class_ids:       [batch, num_classes]. Has a value of 1 for
                            # classes that are in the dataset of the image, and 0
                            # for classes that are not in the dataset. 
    '''
    print()
    print('-------------------------------' )
    print('>>> fcn_heatmap_BCE_loss_graph  ' )
    print('-------------------------------' )
    logt('    target_class_ids  :', target_heatmap)
    logt('    pred_class_logits :', pred_heatmap)
    # target_class_ids = tf.cast(target_class_ids, 'int64')
    
    # Find predictions of classes that are active (present in the GT heatmaps)  
    target_heatmap = tf.transpose(target_heatmap, [0,3,1,2])
    pred_heatmap   = tf.transpose(  pred_heatmap, [0,3,1,2])
    logt(' trgt_heatmap ', target_heatmap)
    logt(' trgt_heatmap ', pred_heatmap  )

    tgt_hm_sum = tf.reduce_sum(target_heatmap, axis = [2,3])
    logt(' tgt_hm_sum ',tgt_hm_sum)

    class_idxs = tf.where(tgt_hm_sum > 0)
    logt(' class indeixes ', class_idxs)

    active_tgt_heatmaps  = tf.gather_nd(target_heatmap, class_idxs)
    active_pred_heatmaps = tf.gather_nd(pred_heatmap, class_idxs)
    logt('active_tgt_heatmaps  ',active_tgt_heatmaps)
    logt('active_pred_heatmaps ',active_pred_heatmaps)
    y_true = tf.reshape(active_tgt_heatmaps, (-1,))
    y_pred = tf.reshape(active_pred_heatmaps, (-1,))
    logt('y_true : ', y_true)
    logt('y_pred : ', y_pred)

    loss = KB.switch(tf.size(y_true) > 0,
                    KB.binary_crossentropy(target=y_true, output=y_pred),
                    tf.constant(0.0))
    logt('loss', loss)
    loss_mean = KB.mean(loss)
    logt('mean loss ', loss_mean)  
    loss_final = tf.reshape(loss_mean, [1, 1], name = 'fcn_BCE_loss')
    logt('loss (final) ', loss_final)
    # return loss    
    print('    loss              :', loss.get_shape()       , KB.int_shape(loss)       , 'KerasTensor: ', KB.is_keras_tensor(loss))
    print('    loss mean         :', loss_mean.get_shape()  , KB.int_shape(loss_mean)  , 'KerasTensor: ', KB.is_keras_tensor(loss_mean))
    print('    loss final        :', loss_final.get_shape() , KB.int_shape(loss_final) , 'KerasTensor: ', KB.is_keras_tensor(loss_final))
    
    return loss_final
def fcn8_graph(feature_map, config, mode=None):
    '''Builds the computation graph of Region Proposal Network.

    feature_map:            Contextual Tensor [batch, num_classes, width, depth]

    Returns:


    '''
    print()
    print('---------------')
    print('>>> FCN8 Layer - mode:', mode)
    print('---------------')
    batch_size = config.BATCH_SIZE
    height, width = config.FCN_INPUT_SHAPE[0:2]
    num_classes = config.NUM_CLASSES
    rois_per_class = config.TRAIN_ROIS_PER_IMAGE
    weight_decay = config.WEIGHT_DECAY
    # In the original implementatoin , batch_momentum was used for batch normalization layers for the ResNet
    # backbone. We are not using this backbone in FCN, therefore it is unused.
    # batch_momentum    = config.BATCH_MOMENTUM
    verbose = config.VERBOSE
    feature_map_shape = (width, height, num_classes)
    print('     feature map      :', feature_map.shape)
    print('     height :', height, 'width :', width, 'classes :', num_classes)
    print('     image_data_format: ', KB.image_data_format())
    print('     rois_per_class   : ', KB.image_data_format())

    if mode == 'training':
        KB.set_learning_phase(1)
    else:
        KB.set_learning_phase(0)
    print('     Set learning phase to :', KB.learning_phase())

    # feature_map = KL.Input(shape= feature_map_shape, name="input_fcn_feature_map")

    # TODO: Assert proper shape of input [batch_size, width, height, num_classes]
    # TODO: check if stride of 2 causes alignment issues if the featuremap is not even.

    # if batch_shape:
    # img_input = Input(batch_shape=batch_shape)
    # image_size = batch_shape[1:3]
    # else:
    # img_input = Input(shape=input_shape)
    # image_size = input_shape[0:2]

    ##-------------------------------------------------------------------------------------------------------
    ## Block 1    data_format='channels_last',
    ##-------------------------------------------------------------------------------------------------------
    x = KL.Conv2D(64, (3, 3),
                  activation='relu',
                  padding='same',
                  name='block1_conv1',
                  kernel_initializer='glorot_uniform',
                  bias_initializer='zeros')(feature_map)
    print('   Input feature map                   : ', feature_map.shape)
    logt('Input feature map ', feature_map, verbose=1)

    logt('FCN Block 11 ', x, verbose=verbose)

    x = KL.Conv2D(64, (3, 3),
                  activation='relu',
                  padding='same',
                  name='block1_conv2',
                  kernel_initializer='glorot_uniform',
                  bias_initializer='zeros')(x)
    logt('FCN Block 12 ', x, verbose=verbose)

    x = KL.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)
    logt('FCN Block 13 (Max pooling) ', x, verbose=verbose)

    ##-------------------------------------------------------------------------------------------------------
    ## Block 2
    ##-------------------------------------------------------------------------------------------------------
    x = KL.Conv2D(128, (3, 3),
                  activation='relu',
                  padding='same',
                  name='block2_conv1',
                  kernel_initializer='glorot_uniform',
                  bias_initializer='zeros')(x)
    logt('FCN Block 21  ', x, verbose=verbose)

    x = KL.Conv2D(128, (3, 3),
                  activation='relu',
                  padding='same',
                  name='block2_conv2',
                  kernel_initializer='glorot_uniform',
                  bias_initializer='zeros')(x)
    logt('FCN Block 22 ', x, verbose=verbose)

    x = KL.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)
    logt('FCN Block 23 (Max pooling) ', x, verbose=verbose)

    ##-------------------------------------------------------------------------------------------------------
    ## Block 3
    ##-------------------------------------------------------------------------------------------------------
    x = KL.Conv2D(256, (3, 3),
                  activation='relu',
                  padding='same',
                  name='block3_conv1',
                  kernel_initializer='glorot_uniform',
                  bias_initializer='zeros')(x)
    logt('FCN Block 31  ', x, verbose=verbose)

    x = KL.Conv2D(256, (3, 3),
                  activation='relu',
                  padding='same',
                  name='block3_conv2',
                  kernel_initializer='glorot_uniform',
                  bias_initializer='zeros')(x)
    logt('FCN Block 32 ', x, verbose=verbose)

    x = KL.Conv2D(256, (3, 3),
                  activation='relu',
                  padding='same',
                  name='block3_conv3',
                  kernel_initializer='glorot_uniform',
                  bias_initializer='zeros')(x)
    logt('FCN Block 33 ', x, verbose=verbose)

    Pool3 = KL.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)
    logt('FCN Block 34 (Max pooling) ', Pool3, verbose=verbose)

    ##-------------------------------------------------------------------------------------------------------
    ## Block 4
    ##-------------------------------------------------------------------------------------------------------
    x = KL.Conv2D(512, (3, 3),
                  activation='relu',
                  padding='same',
                  name='block4_conv1',
                  kernel_initializer='glorot_uniform',
                  bias_initializer='zeros')(Pool3)
    logt('FCN Block 41 ', x, verbose=verbose)

    x = KL.Conv2D(512, (3, 3),
                  activation='relu',
                  padding='same',
                  name='block4_conv2',
                  kernel_initializer='glorot_uniform',
                  bias_initializer='zeros')(x)
    logt('FCN Block 42 ', x, verbose=verbose)

    x = KL.Conv2D(512, (3, 3),
                  activation='relu',
                  padding='same',
                  name='block4_conv3',
                  kernel_initializer='glorot_uniform',
                  bias_initializer='zeros')(x)
    logt('FCN Block 43 ', x, verbose=verbose)

    Pool4 = KL.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)
    logt('FCN Block 44 (Max pooling) ', Pool4, verbose=verbose)

    ##-------------------------------------------------------------------------------------------------------
    ## Block 5
    ##-------------------------------------------------------------------------------------------------------
    x = KL.Conv2D(512, (3, 3),
                  activation='relu',
                  padding='same',
                  name='block5_conv1',
                  kernel_initializer='glorot_uniform',
                  bias_initializer='zeros')(Pool4)
    logt('FCN Block 51 ', x, verbose=verbose)

    x = KL.Conv2D(512, (3, 3),
                  activation='relu',
                  padding='same',
                  name='block5_conv2',
                  kernel_initializer='glorot_uniform',
                  bias_initializer='zeros')(x)
    logt('FCN Block 52 ', x, verbose=verbose)

    x = KL.Conv2D(512, (3, 3),
                  activation='relu',
                  padding='same',
                  name='block5_conv3',
                  kernel_initializer='glorot_uniform',
                  bias_initializer='zeros')(x)
    logt('FCN Block 53 ', x, verbose=verbose)

    x = KL.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)
    logt('FCN Block 54 (Max pooling) ', x, verbose=verbose)

    ##-------------------------------------------------------------------------------------------------------
    ## FCN32 Specific Structure
    ##-------------------------------------------------------------------------------------------------------
    # Convolutional layers transfered from fully-connected layers
    # changed from 4096 to 2048 - reduction of weights from 42,752,644 to
    # changed ftom 2048 to 1024 - 11-05-2018
    # FC_SIZE = 2048
    FC_SIZE = 4096
    x = KL.Conv2D(FC_SIZE, (7, 7),
                  activation='relu',
                  padding='same',
                  name='fcn32_fc1',
                  kernel_initializer='glorot_uniform',
                  bias_initializer='zeros')(x)
    print()
    print('   --- FCN32 ----------------------------')
    logt(' FCN fully connected 1 (fc1) ', x, verbose=verbose)

    x = KL.Dropout(0.5)(x)
    x = KL.Conv2D(FC_SIZE, (1, 1),
                  activation='relu',
                  padding='same',
                  name='fcn32_fc2',
                  kernel_initializer='glorot_uniform',
                  bias_initializer='zeros')(x)
    logt('FCN fully connected 2 (fc2) ', x, verbose=verbose)

    x = KL.Dropout(0.5)(x)
    # Classifying layer
    x = KL.Conv2D(num_classes, (1, 1),
                  activation='linear',
                  padding='valid',
                  strides=(1, 1),
                  name='fcn32_deconv2D',
                  kernel_initializer='he_normal',
                  bias_initializer='zeros')(x)
    logt('FCN conv2d (fcn32_deconv2D)  ', x, verbose=verbose)

    ##-------------------------------------------------------------------------------------------------------
    ## FCN16 Specific Structure
    ##-------------------------------------------------------------------------------------------------------
    # Score Pool4 - Reduce Pool4 filters from 512 to num_classes (81)
    scorePool4 = KL.Conv2D(num_classes, (1, 1),
                           activation='relu',
                           padding='valid',
                           name='fcn16_score_pool4',
                           kernel_initializer='glorot_uniform',
                           bias_initializer='zeros')(Pool4)
    print()
    print('   --- FCN16 ----------------------------')
    logt('FCN scorePool4 (Conv2D(Pool4)) ', scorePool4, verbose=verbose)

    # 2x Upsampling of fcn_deconv2D  to generate Score2 (padding was originally "valid")
    x = KL.Deconvolution2D(num_classes,
                           kernel_size=(4, 4),
                           activation=None,
                           padding='valid',
                           name='fcn16_score2',
                           strides=(2, 2))(x)
    logt('FCN 2x Upsampling (Deconvolution2D(fcn32_classify))  ',
         x,
         verbose=verbose)

    # Crop to appropriate shape if required
    score2_c = KL.Cropping2D(cropping=((1, 1), (1, 1)),
                             name='fcn16_crop_score2')(x)
    logt('FCN 2x Upsampling/Cropped (Cropped2D(score2)) ',
         score2_c,
         verbose=verbose)

    # Sum Score2, scorePool4
    x = KL.Add(name='fcn16_fuse_pool4')([score2_c, scorePool4])
    logt('FCN Add Score2,scorePool4 Add(score2_c, scorePool4)  ',
         x,
         verbose=verbose)

    # 2x Upsampling  (padding was originally "valid", I changed it to "same" )
    x = KL.Deconvolution2D(num_classes,
                           kernel_size=(4, 4),
                           activation=None,
                           padding='same',
                           name='fcn16_upscore_pool4',
                           kernel_initializer='glorot_uniform',
                           bias_initializer='zeros',
                           strides=(2, 2))(x)

    logt('FCN upscore_pool4 (Deconv(fuse_Pool4)) ', x, verbose=verbose)

    ##-------------------------------------------------------------------------------------------------------
    ## FCN8 Specific Structure
    ##-------------------------------------------------------------------------------------------------------
    # Score Pool3 - Reduce Pool3 filters from 256 to num_classes (81)
    scorePool3 = KL.Conv2D(num_classes, (1, 1),
                           activation='relu',
                           padding='valid',
                           name='fcn8_score_pool3',
                           kernel_initializer='glorot_uniform',
                           bias_initializer='zeros')(Pool3)
    print()
    print('   --- FCN8 ----------------------------')
    logt('FCN scorePool3 (Conv2D(Pool3))  ', scorePool3, verbose=verbose)

    upscore_pool4_c = KL.Cropping2D(cropping=((0, 0), (0, 0)),
                                    name='fcn8_crop_pool4')(x)
    logt('FCN 2x Upsampling/Cropped (Cropped2D(score2)) ',
         upscore_pool4_c,
         verbose=verbose)

    # Sum  upscore_pool4_c, scorePool3
    x = KL.Add(name='fcn8_fuse_pool3')([upscore_pool4_c, scorePool3])
    logt('FCN Add Score2,scorePool4', x, verbose=verbose)

    print()

    ##-------------------------------------------------------------------------------------------------------
    ## fcn_heatmap
    ##-------------------------------------------------------------------------------------------------------
    # 8x Upsampling  (padding was originally "valid", I changed it to "same" )
    fcn_hm = KL.Deconvolution2D(num_classes,
                                kernel_size=(16, 16),
                                activation=None,
                                padding='same',
                                name='fcn8_heatmap',
                                kernel_initializer='glorot_uniform',
                                bias_initializer='zeros',
                                strides=(8, 8))(x)
    # fcn_hm = tf.identity(fcn_hm)
    fcn_hm.set_shape(feature_map.shape)
    logt('FCN fcn8_classify/heatmap  (Deconv(fuse_Pool4)) ',
         fcn_hm,
         verbose=verbose)
    fcn_hm = KL.Lambda(lambda z: tf.identity(z, name='fcn_hm'),
                       name='fcn_heatmap_lambda')(fcn_hm)
    logt('fcn_hm (final)', fcn_hm, verbose=verbose)
    print()

    # fcn_classify_shape = KB.int_shape(fcn_hm)
    # h_factor = height / fcn_classify_shape[1]
    # w_factor = width  / fcn_classify_shape[2]
    # print('   fcn_classify_shape:',fcn_classify_shape,'   h_factor : ', h_factor, '  w_factor : ', w_factor)

    # x = BilinearUpSampling2D(size=(h_factor, w_factor), name='fcn_bilinear')(x)
    # print('   FCN Bilinear upsmapling layer  shape is : ' , KB.int_shape(x), ' Keras tensor ', KB.is_keras_tensor(x) )

    ##-------------------------------------------------------------------------------------------------------
    ## fcn_heatmap
    ##-------------------------------------------------------------------------------------------------------
    fcn_sm = KL.Activation("softmax", name="fcn8_softmax")(fcn_hm)
    logt('fcn8_softmax  ', fcn_sm, verbose=verbose)
    fcn_sm = KL.Lambda(lambda z: tf.identity(z, name='fcn_sm'),
                       name='fcn_softmax_lambda')(fcn_hm)
    logt('fcn_sm (final)', fcn_sm, verbose=verbose)
    print()

    #---------------------------------------------------------------------------------------------
    # heatmap L2 normalization
    # Normalization using the  `gauss_sum` (batchsize , num_classes, height, width)
    # 17-05-2018 (New method, replace dthe previous method that usedthe transposed gauss sum
    # 17-05-2018 Replaced with normalization across the CLASS axis
    #                         normalize along the CLASS axis
    #---------------------------------------------------------------------------------------------
    # print('\n    L2 normalization ------------------------------------------------------')
    # fcn_hm_L2norm = KL.Lambda(lambda z: tf.nn.l2_normalize(z, axis = 3, name = 'fcn_heatmap_L2norm'),\
    # name = 'fcn_heatmap_L2norm')(x)
    # print('\n    normalization ------------------------------------------------------')
    # fcn_hm_norm   = KL.Lambda(normalize, name="fcn_heatmap_norm") (x)

    return fcn_hm, fcn_sm
def fcn_scoring_graph(input, config, mode):
    in_heatmap, pr_scores = input
    detections_per_image = pr_scores.shape[2]
    rois_per_image = KB.int_shape(pr_scores)[2]
    img_h, img_w = config.IMAGE_SHAPE[:2]
    batch_size = config.BATCH_SIZE
    num_classes = config.NUM_CLASSES
    heatmap_scale = config.HEATMAP_SCALE_FACTOR
    class_column = 4
    score_column = 5
    if mode == 'training':
        sequence_column = 6
        norm_score_column = 7
    else:
        dt_type_column = 6
        sequence_column = 7
        norm_score_column = 8

    print('\n ')
    print('----------------------')
    print('>>> FCN Scoring Layer - mode:', mode)
    print('----------------------')
    logt('in_heatmap.shape  ', in_heatmap)
    logt('pr_hm_scores.shape', pr_scores)
    # rois per image is determined by size of input tensor
    #   detection mode:   config.TRAIN_ROIS_PER_IMAGE
    #   ground_truth  :   config.DETECTION_MAX_INSTANCES

    print('    detctions_per_image : ', detections_per_image,
          'pr_scores shape', pr_scores.shape)
    print('    rois_per_image      : ', rois_per_image)
    print('    config.DETECTION_MAX_INSTANCES   : ',
          config.DETECTION_MAX_INSTANCES)
    print('    config.DETECTIONS_PER_CLASS      : ',
          config.DETECTION_PER_CLASS)
    print('    sequence_column                  : ', sequence_column)
    print('    norm_score_column                : ', norm_score_column)

    ##---------------------------------------------------------------------------------------------
    ## Stack non_zero bboxes from PR_SCORES into pt2_dense
    ##---------------------------------------------------------------------------------------------
    # pt2_ind shape  : [?, 3] : [ {image_index, class_index , roi row_index }]
    # pt2_dense shape: [?, 11] :
    #    pt2_dense[0:3]  roi coordinates
    #    pt2_dense[4]    is class id
    #    pt2_dense[5]    is score from mrcnn
    #    pt2_dense[6]    is bbox sequence id
    #    pt2_dense[7]    is normalized score (per class)
    #-----------------------------------------------------------------------------
    pt2_sum = tf.reduce_sum(tf.abs(pr_scores[:, :, :, :class_column]), axis=-1)
    pt2_ind = tf.where(pt2_sum > 0)
    pt2_dense = tf.gather_nd(pr_scores, pt2_ind)
    logt('in_heatmap       ', in_heatmap)
    logt('pr_scores.shape  ', pr_scores)
    logt('pt2_sum shape    ', pt2_sum)
    logt('pt2_ind shape    ', pt2_ind)
    logt('pt2_dense shape  ', pt2_dense)

    ##---------------------------------------------------------------------------------------------
    ##  Build mean and convariance tensors for bounding boxes
    ##---------------------------------------------------------------------------------------------
    # bboxes_scaled = tf.to_int32(tf.round(pt2_dense[...,0:4])) / heatmap_scale
    bboxes_scaled = pt2_dense[..., 0:class_column] / heatmap_scale
    width = bboxes_scaled[:, 3] - bboxes_scaled[:, 1]  # x2 - x1
    height = bboxes_scaled[:, 2] - bboxes_scaled[:, 0]
    cx = bboxes_scaled[:, 1] + (width / 2.0)
    cy = bboxes_scaled[:, 0] + (height / 2.0)
    # means  = tf.stack((cx,cy),axis = -1)
    covar = tf.stack((width * 0.5, height * 0.5), axis=-1)
    covar = tf.sqrt(covar)

    ##---------------------------------------------------------------------------------------------
    ##  build indices and extract heatmaps corresponding to each bounding boxes' class id
    ##---------------------------------------------------------------------------------------------
    hm_indices = tf.cast(pt2_ind[:, :2], dtype=tf.int32)
    logt('hm_indices  ', hm_indices)
    pt2_heatmaps = tf.transpose(in_heatmap, [0, 3, 1, 2])
    logt('pt2_heatmaps', pt2_heatmaps)
    pt2_heatmaps = tf.gather_nd(pt2_heatmaps, hm_indices)
    logt('pt2_heatmaps', pt2_heatmaps)

    ##--------------------------------------------------------------------------------------------
    ## (0) Generate scores using prob_grid and pt2_dense
    ##--------------------------------------------------------------------------------------------
    old_style_scores = tf.map_fn(
        build_hm_score_v2,
        [pt2_heatmaps, bboxes_scaled, pt2_dense[:, norm_score_column]],
        dtype=tf.float32,
        swap_memory=True)
    logt('old_style_scores', old_style_scores)

    # old_style_scores = tf.scatter_nd(pt2_ind, old_style_scores,
    # [batch_size, num_classes, rois_per_image, KB.int_shape(old_style_scores)[-1]],
    # name = 'scores_scattered')
    # print('    old_style_scores        :',  old_style_scores.get_shape(), KB.int_shape(old_style_scores))

    ##---------------------------------------------------------------------------------------------
    ## generate score based on gaussian using bounding box masks
    ##---------------------------------------------------------------------------------------------
    alt_scores_1 = tf.map_fn(build_hm_score_v3, [pt2_heatmaps, cy, cx, covar],
                             dtype=tf.float32)
    logt('alt_scores_1 ', alt_scores_1)

    ##---------------------------------------------------------------------------------------------
    ##  Scatter back to per-class tensor /  normalize by class
    ##---------------------------------------------------------------------------------------------
    alt_scores_1_norm = tf.scatter_nd(
        pt2_ind,
        alt_scores_1, [
            batch_size, num_classes, detections_per_image,
            KB.int_shape(alt_scores_1)[-1]
        ],
        name='alt_scores_1_norm')
    logt('alt_scores_1_scattered', alt_scores_1_norm)
    alt_scores_1_norm = normalize_scores(alt_scores_1_norm)
    logt('alt_scores_1_norm(by_class)', alt_scores_1_norm)
    alt_scores_1_norm = tf.gather_nd(alt_scores_1_norm, pt2_ind)
    logt('alt_scores_1_norm(by_image)', alt_scores_1_norm)

    ##---------------------------------------------------------------------------------------------
    ## Normalize input heatmap normalization (per class) to calculate alt_score_2
    ##--------------------------------------------------------------------------------------------
    print(
        '\n    Normalize heatmap within each class !-------------------------------------'
    )
    in_heatmap_norm = tf.transpose(in_heatmap, [0, 3, 1, 2])

    print('    in_heatmap_norm : ', in_heatmap_norm.get_shape(),
          'Keras tensor ', KB.is_keras_tensor(in_heatmap_norm))
    ## normalize in class
    normalizer = tf.reduce_max(in_heatmap_norm, axis=[-2, -1], keepdims=True)
    normalizer = tf.where(normalizer < 1.0e-15, tf.ones_like(normalizer),
                          normalizer)
    in_heatmap_norm = in_heatmap_norm / normalizer
    # gauss_heatmap_sum_normalized = gauss_heatmap_sum / normalizer
    print('    normalizer shape   : ', normalizer.shape)
    print('    normalized heatmap : ', in_heatmap_norm.shape, ' Keras tensor ',
          KB.is_keras_tensor(in_heatmap_norm))

    ##---------------------------------------------------------------------------------------------
    ##  build indices and extract heatmaps corresponding to each bounding boxes' class id
    ##  build alternative scores#  based on normalized/sclaked clipped heatmap
    ##---------------------------------------------------------------------------------------------
    hm_indices = tf.cast(pt2_ind[:, :2], dtype=tf.int32)
    logt('hm_indices shape', hm_indices)

    pt2_heatmaps = tf.gather_nd(in_heatmap_norm, hm_indices)
    logt('pt2_heatmaps', pt2_heatmaps)

    alt_scores_2 = tf.map_fn(build_hm_score_v3, [pt2_heatmaps, cy, cx, covar],
                             dtype=tf.float32)
    logt('alt_scores_2', alt_scores_2)

    alt_scores_2_norm = tf.scatter_nd(
        pt2_ind,
        alt_scores_2, [
            batch_size, num_classes, rois_per_image,
            KB.int_shape(alt_scores_2)[-1]
        ],
        name='alt_scores_2')
    logt('alt_scores_2(scattered)', alt_scores_2_norm)

    alt_scores_2_norm = normalize_scores(alt_scores_2_norm)
    logt('alt_scores_2_norm(by_class)', alt_scores_2_norm)

    alt_scores_2_norm = tf.gather_nd(alt_scores_2_norm, pt2_ind)
    logt('alt_scores_2_norm(by_image)', alt_scores_2_norm)
    ####################################################################################################################

    ##--------------------------------------------------------------------------------------------
    ##  Append alt_scores_1, alt_scores_1_norm to yield fcn_scores_dense
    ##--------------------------------------------------------------------------------------------
    fcn_scores_dense = tf.concat([
        pt2_dense[:, :norm_score_column + 1], old_style_scores, alt_scores_1,
        alt_scores_1_norm, alt_scores_2, alt_scores_2_norm
    ],
                                 axis=-1,
                                 name='fcn_scores_dense')
    logt('fcn_scores_dense    ', fcn_scores_dense)

    ##---------------------------------------------------------------------------------------------
    ##  Scatter back to per-image tensor
    ##---------------------------------------------------------------------------------------------
    seq_ids = tf.to_int32(rois_per_image - pt2_dense[:, sequence_column])
    scatter_ind = tf.stack([hm_indices[:, 0], seq_ids],
                           axis=-1,
                           name='scatter_ind')
    fcn_scores_by_class = tf.scatter_nd(
        pt2_ind,
        fcn_scores_dense, [
            batch_size, num_classes, detections_per_image,
            fcn_scores_dense.shape[-1]
        ],
        name='fcn_hm_scores')
    # fcn_scores_by_image = tf.scatter_nd(scatter_ind, fcn_scores_dense,
    # [batch_size, detections_per_image, fcn_scores_dense.shape[-1]], name='fcn_hm_scores_by_image')
    logt('seq_ids             ', seq_ids)
    logt('sscatter_ids        ', scatter_ind)
    logt('fcn_scores_by_class ', fcn_scores_by_class)
    # logt('fcn_scores_by_image ', fcn_scores_by_image)
    logt('complete')

    return fcn_scores_by_class
示例#7
0
def fcn32_graph(feature_map, config, mode=None):
    '''Builds the computation graph of Region Proposal Network.

    feature_map:            Contextual Tensor [batch, num_classes, width, depth]

    Returns:


    '''
    print()
    print('---------------')
    print('>>> FCN32 Layer - mode:', mode)
    print('---------------')
    batch_size = config.BATCH_SIZE
    height, width = config.FCN_INPUT_SHAPE[0:2]
    num_classes = config.NUM_CLASSES
    rois_per_class = config.TRAIN_ROIS_PER_IMAGE
    weight_decay = config.WEIGHT_DECAY
    batch_momentum = config.BATCH_MOMENTUM
    verbose = config.VERBOSE
    feature_map_shape = (width, height, num_classes)
    print('     feature map      :', feature_map.shape)
    print('     height :', height, 'width :', width, 'classes :', num_classes)
    print('     image_data_format: ', KB.image_data_format())
    print('     rois_per_class   : ', KB.image_data_format())

    # feature_map = KL.Input(shape= feature_map_shape, name="input_fcn_feature_map")
    # TODO: Assert proper shape of input [batch_size, width, height, num_classes]

    # TODO: check if stride of 2 causes alignment issues if the featuremap is not even.

    # if batch_shape:
    # img_input = Input(batch_shape=batch_shape)
    # image_size = batch_shape[1:3]
    # else:
    # img_input = Input(shape=input_shape)
    # image_size = input_shape[0:2]

    ## , kernel_regularizer=l2(weight_decay)

    # Block 1    data_format='channels_last',

    x = KL.Conv2D(64, (3, 3),
                  activation='relu',
                  padding='same',
                  name='block1_conv1',
                  kernel_initializer='glorot_uniform',
                  bias_initializer='zeros')(feature_map)
    print('   FCN Block 11 shape is : ', x.get_shape())

    x = KL.Conv2D(64, (3, 3),
                  activation='relu',
                  padding='same',
                  name='block1_conv2',
                  kernel_initializer='glorot_uniform',
                  bias_initializer='zeros')(x)
    print('   FCN Block 12 shape is : ', x.get_shape())

    x = KL.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)
    print('   FCN Block 13 shape is : ', x.get_shape())
    x0 = x

    # Block 2
    x = KL.Conv2D(128, (3, 3),
                  activation='relu',
                  padding='same',
                  name='block2_conv1',
                  kernel_initializer='glorot_uniform',
                  bias_initializer='zeros')(x)
    print('   FCN Block 21 shape is : ', x.get_shape())

    x = KL.Conv2D(128, (3, 3),
                  activation='relu',
                  padding='same',
                  name='block2_conv2',
                  kernel_initializer='glorot_uniform',
                  bias_initializer='zeros')(x)
    print('   FCN Block 22 shape is : ', x.get_shape())

    x = KL.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)
    print('   FCN Block 23 (Max pooling) shape is : ', x.get_shape())
    x1 = x

    # Block 3
    x = KL.Conv2D(256, (3, 3),
                  activation='relu',
                  padding='same',
                  name='block3_conv1',
                  kernel_initializer='glorot_uniform',
                  bias_initializer='zeros')(x)
    print('   FCN Block 31 shape is : ', x.get_shape())

    x = KL.Conv2D(256, (3, 3),
                  activation='relu',
                  padding='same',
                  name='block3_conv2',
                  kernel_initializer='glorot_uniform',
                  bias_initializer='zeros')(x)
    print('   FCN Block 32 shape is : ', x.get_shape())

    x = KL.Conv2D(256, (3, 3),
                  activation='relu',
                  padding='same',
                  name='block3_conv3',
                  kernel_initializer='glorot_uniform',
                  bias_initializer='zeros')(x)
    print('   FCN Block 33 shape is : ', x.get_shape())

    x = KL.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)
    print('   FCN Block 34 (Max pooling) shape is : ', x.get_shape())

    # Block 4
    x = KL.Conv2D(512, (3, 3),
                  activation='relu',
                  padding='same',
                  name='block4_conv1',
                  kernel_initializer='glorot_uniform',
                  bias_initializer='zeros')(x)
    print('   FCN Block 41 shape is : ', x.get_shape())
    x = KL.Conv2D(512, (3, 3),
                  activation='relu',
                  padding='same',
                  name='block4_conv2',
                  kernel_initializer='glorot_uniform',
                  bias_initializer='zeros')(x)
    print('   FCN Block 42 shape is : ', x.get_shape())
    x = KL.Conv2D(512, (3, 3),
                  activation='relu',
                  padding='same',
                  name='block4_conv3',
                  kernel_initializer='glorot_uniform',
                  bias_initializer='zeros')(x)
    print('   FCN Block 43 shape is : ', x.get_shape())
    x = KL.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)
    print('   FCN Block 44 (Max pooling) shape is : ', x.get_shape())

    # Block 5
    x = KL.Conv2D(512, (3, 3),
                  activation='relu',
                  padding='same',
                  name='block5_conv1',
                  kernel_initializer='glorot_uniform',
                  bias_initializer='zeros')(x)
    print('   FCN Block 51 shape is : ', x.get_shape())
    x = KL.Conv2D(512, (3, 3),
                  activation='relu',
                  padding='same',
                  name='block5_conv2',
                  kernel_initializer='glorot_uniform',
                  bias_initializer='zeros')(x)
    print('   FCN Block 52 shape is : ', x.get_shape())
    x = KL.Conv2D(512, (3, 3),
                  activation='relu',
                  padding='same',
                  name='block5_conv3',
                  kernel_initializer='glorot_uniform',
                  bias_initializer='zeros')(x)
    print('   FCN Block 53 shape is : ', x.get_shape())
    x = KL.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)
    print('   FCN Block 54 (Max pooling) shape is : ', x.get_shape())

    ##-------------------------------------------------------------------------------------------------------
    ## FCN32 Specific Structure
    ##-------------------------------------------------------------------------------------------------------
    # Convolutional layers transfered from fully-connected layers
    # changed from 4096 to 2048 - reduction of weights from 42,752,644 to
    # changed ftom 2048 to 1024 - 11-05-2018

    # FC_SIZE = 2048
    FC_SIZE = 4096
    x = KL.Conv2D(FC_SIZE, (7, 7),
                  activation='relu',
                  padding='same',
                  name="fc1",
                  kernel_initializer='glorot_uniform',
                  bias_initializer='zeros')(x)

    print()
    print('   --- FCN32 ----------------------------')
    print('   FCN fully connected 1 (fcn_fc1) shape is : ', KB.int_shape(x))
    x = KL.Dropout(0.5)(x)

    #fc2
    x = KL.Conv2D(FC_SIZE, (1, 1),
                  activation='relu',
                  padding='same',
                  name="fc2",
                  kernel_initializer='glorot_uniform',
                  bias_initializer='zeros')(x)

    print('   FCN fully connected 2 (fcn_fc2) shape is : ', x.get_shape())
    x = KL.Dropout(0.5)(x)

    #classifying layer
    x = KL.Conv2D(num_classes, (1, 1),
                  kernel_initializer='he_normal',
                  bias_initializer='zeros',
                  activation='linear',
                  padding='valid',
                  strides=(1, 1),
                  name="fcn_classify")(x)

    print('   FCN final conv2d (fcn_classify) shape is : ', x.get_shape(),
          ' keras_tensor ', KB.is_keras_tensor(x))

    fcn_classify_shape = KB.int_shape(x)
    h_factor = height / fcn_classify_shape[1]
    w_factor = height / fcn_classify_shape[2]
    print('   h_factor : ', h_factor, 'w_factor : ', w_factor)

    # x = BilinearUpSampling2D(size=(h_factor, w_factor), name='fcn_bilinear')(x)
    # print('   FCN Bilinear upsmapling layer  shape is : ' , x.get_shape(), ' Keras tensor ', KB.is_keras_tensor(x) )
    ##-------------------------------------------------------------------------------------------------------
    ## fcn_heatmap
    ##-------------------------------------------------------------------------------------------------------
    # 8x Upsampling  (padding was originally "valid", I changed it to "same" )
    fcn_hm = KL.Deconvolution2D(num_classes,
                                kernel_size=(16, 16),
                                strides=(32, 32),
                                kernel_initializer='glorot_uniform',
                                bias_initializer='zeros',
                                padding='same',
                                activation=None,
                                name="fcn8_heatmap")(x)

    # fcn_hm = tf.identity(fcn_hm)
    fcn_hm.set_shape(feature_map.shape)
    logt('FCN fcn8_classify/heatmap  (Deconv(fuse_Pool4)) ',
         fcn_hm,
         verbose=verbose)
    fcn_hm = KL.Lambda(lambda z: tf.identity(z, name='fcn_hm'),
                       name='fcn_heatmap_lambda')(fcn_hm)
    logt('fcn_hm (final)', fcn_hm, verbose=verbose)
    print()

    ##-------------------------------------------------------------------------------------------------------
    ## fcn_SOFTMAX
    ##-------------------------------------------------------------------------------------------------------
    fcn_sm = KL.Activation("softmax", name="fcn8_softmax")(fcn_hm)
    logt('fcn8_softmax  ', fcn_sm, verbose=verbose)
    fcn_sm = KL.Lambda(lambda z: tf.identity(z, name='fcn_sm'),
                       name='fcn_softmax_lambda')(fcn_hm)
    logt('fcn_sm (final)', fcn_sm, verbose=verbose)
    print()

    #---------------------------------------------------------------------------------------------
    # heatmap L2 normalization
    # Normalization using the  `gauss_sum` (batchsize , num_classes, height, width)
    # 17-05-2018 (New method, replace dthe previous method that usedthe transposed gauss sum
    # 17-05-2018 Replaced with normalization across the CLASS axis
    #                         normalize along the CLASS axis
    #---------------------------------------------------------------------------------------------
    # print('\n    L2 normalization ------------------------------------------------------')
    # fcn_hm_L2norm = KL.Lambda(lambda z: tf.nn.l2_normalize(z, axis = 3, name = 'fcn_heatmap_L2norm'),\
    # name = 'fcn_heatmap_L2norm')(x)
    # print('\n    normalization ------------------------------------------------------')
    # fcn_hm_norm   = KL.Lambda(normalize, name="fcn_heatmap_norm") (x)

    print('    fcn_heatmap       : ', fcn_hm.shape, ' Keras tensor ',
          KB.is_keras_tensor(fcn_hm))
    # print('    fcn_heatmap_norm  : ', fcn_hm_norm.shape   ,' Keras tensor ', KB.is_keras_tensor(fcn_hm_norm) )
    # print('    fcn_heatmap_L2norm: ', fcn_hm_L2norm.shape ,' Keras tensor ', KB.is_keras_tensor(fcn_hm_L2norm) )

    return fcn_hm, fcn_sm
    def call(self, inputs):
        verbose = self.config.VERBOSE

        tgt_class_ids, tgt_bboxes = inputs
        logt('  > CHMLayerTgt Call()   :', inputs, verbose=verbose)
        logt('    tgt_class_ids.shape  :', tgt_class_ids, verbose=verbose)
        logt('    tgt_bboxes.shape     :', tgt_bboxes, verbose=verbose)

        gt_tensor = build_gt_tensor(tgt_class_ids, tgt_bboxes, self.config)
        gt_hm, gt_hm_scores = build_gt_heatmap(gt_tensor,
                                               self.config,
                                               names=['gt_heatmap'])
        # gt_cls_cnt   = KL.Lambda(lambda x: tf.count_nonzero(x[:,:,:,-1],axis = -1), name = 'gt_cls_count')(gt_tensor)

        logt(' ', verbose=verbose)
        logt('gt_heatmap        ', gt_hm, verbose=verbose)
        logt('gt_heatmap_scores ', gt_hm_scores, verbose=verbose)
        logt('complete', verbose=verbose)

        return [gt_hm, gt_hm_scores]
示例#9
0
def fcn_heatmap_BCE_loss_graph_2(target_heatmap, pred_heatmap, config):
    '''
    Binary Cross Entropy Loss for the FCN heatmaps - calculate for ONE CLASS ONLY!
    
    Apply a per-pixel sigmoid and binary loss, similar to the Lmask loss calculation
    in MaskRCNN. 
    Two approaches :
    1- Only calaculate loss for classes which have active GT bounding boxes
    2- Calculate for all classes 
    
    We will implement approach 1. 
    
    
    target_heatmaps:    [batch, height, width, num_classes].
                        A float32 tensor of values 0 or 1. Uses zero padding to fill array.

    target_class_ids:   [batch, num_rois]. Integer class IDs. Zero padded.

    pred_masks:         [batch, height, width, num_classes]  float32 tensor
                        with values from 0 to 1.

    # active_class_ids:       [batch, num_classes]. Has a value of 1 for
                            # classes that are in the dataset of the image, and 0
                            # for classes that are not in the dataset. 
    '''
    print()
    print('--------------------------------------------------------')
    print('>>> fcn_heatmap_BCE_loss_graph_2 -- On ONE CLASS ONLY!  ')
    print('--------------------------------------------------------')
    logt('    target_class_ids  :', target_heatmap)
    logt('    pred_class_logits :', pred_heatmap)
    error_cls = config.FCN_BCE_LOSS_CLASS
    logt('    fcn_bce_loss_class:', error_cls)

    # Transpose to Image, Class, Height, Width
    target_heatmap = tf.transpose(target_heatmap, [0, 3, 1, 2])
    pred_heatmap = tf.transpose(pred_heatmap, [0, 3, 1, 2])
    logt(' trgt_heatmap ', target_heatmap)
    logt(' trgt_heatmap ', pred_heatmap)

    # LOSS 3 : Loass on SUN class only
    loss2 = KB.binary_crossentropy(
        target=target_heatmap[:, error_cls:error_cls + 1],
        output=pred_heatmap[:, error_cls:error_cls + 1])
    logt('loss2      ', loss2)
    loss2_mean = KB.mean(loss2)
    logt('loss2_mean ', loss2_mean)
    loss2_final = tf.reshape(loss2_mean, [1, 1], name='fcn_BCE_loss')
    logt('loss2_final', loss2_final)

    return loss2_final
def build_gt_heatmap(in_tensor, config, names=None):
    verbose = config.VERBOSE
    num_detections = config.DETECTION_MAX_INSTANCES
    img_h, img_w = config.IMAGE_SHAPE[:2]
    batch_size = config.BATCH_SIZE
    num_classes = config.NUM_CLASSES
    heatmap_scale = config.HEATMAP_SCALE_FACTOR
    grid_h, grid_w = config.IMAGE_SHAPE[:2] // heatmap_scale
    # rois per image is determined by size of input tensor
    #   detection mode:   config.TRAIN_ROIS_PER_IMAGE
    #   ground_truth  :   config.DETECTION_MAX_INSTANCES
    #   strt_cls        = 0 if rois_per_image == 32 else 1
    # rois_per_image  = config.DETECTION_PER_CLASS
    rois_per_image = (in_tensor.shape)[2]

    if verbose:
        print('\n ')
        print('  > build_heatmap() for ', names)
        print('    in_tensor shape        : ', in_tensor.shape)
        print('    num bboxes per class   : ', rois_per_image)
        print('    heatmap scale        : ', heatmap_scale, 'Dimensions:  w:',
              grid_w, ' h:', grid_h)

    ##-----------------------------------------------------------------------------
    ## Stack non_zero bboxes from in_tensor into pt2_dense
    ##-----------------------------------------------------------------------------
    # pt2_ind shape is [?, 3].
    #    pt2_ind[0] corresponds to image_index
    #    pt2_ind[1] corresponds to class_index
    #    pt2_ind[2] corresponds to roi row_index
    # pt2_dense shape is [?, 7]
    #    pt2_dense[0:3]  roi coordinates
    #    pt2_dense[4]    is class id
    #    pt2_dense[5]    is score from mrcnn
    #    pt2_dense[6]    is bbox sequence id
    #    pt2_dense[7]    is normalized score (per class)
    #-----------------------------------------------------------------------------
    pt2_sum = tf.reduce_sum(tf.abs(in_tensor[:, :, :, :4]), axis=-1)
    pt2_ind = tf.where(pt2_sum > 0)
    pt2_dense = tf.gather_nd(in_tensor, pt2_ind)

    logt('pt2_sum   ', pt2_sum, verbose=verbose)
    logt('pt2_ind   ', pt2_ind, verbose=verbose)
    logt('pt2_dense ', pt2_dense, verbose=verbose)

    ##-----------------------------------------------------------------------------
    ## Build mesh-grid to hold pixel coordinates
    ##-----------------------------------------------------------------------------
    # X = tf.range(grid_w, dtype=tf.int32)
    # Y = tf.range(grid_h, dtype=tf.int32)
    # X, Y = tf.meshgrid(X, Y)

    # duplicate (repeat) X and Y into a  batch_size x rois_per_image tensor
    # print('    X/Y shapes :',  X.get_shape(), Y.get_shape())
    # ones = tf.ones([tf.shape(pt2_dense)[0] , 1, 1], dtype = tf.int32)
    # rep_X = ones * X
    # rep_Y = ones * Y
    # print('    Ones:       ', ones.shape)
    # print('    ones_exp * X', ones.shape, '*', X.shape, '= ',rep_X.shape)
    # print('    ones_exp * Y', ones.shape, '*', Y.shape, '= ',rep_Y.shape)

    # # stack the X and Y grids
    # pos_grid = tf.to_float(tf.stack([rep_X,rep_Y], axis = -1))
    # print('    pos_grid before transpose : ', pos_grid.get_shape())
    # pos_grid = tf.transpose(pos_grid,[1,2,0,3])
    # print('    pos_grid after  transpose : ', pos_grid.get_shape())

    ##-----------------------------------------------------------------------------
    ##  Build mean and convariance tensors for Multivariate Normal Distribution
    ##-----------------------------------------------------------------------------
    pt2_dense_scaled = pt2_dense[:, :4] / heatmap_scale
    width = pt2_dense_scaled[:, 3] - pt2_dense_scaled[:, 1]  # x2 - x1
    height = pt2_dense_scaled[:, 2] - pt2_dense_scaled[:, 0]
    cx = pt2_dense_scaled[:, 1] + (width / 2.0)
    cy = pt2_dense_scaled[:, 0] + (height / 2.0)
    means = tf.stack((cx, cy), axis=-1)
    covar = tf.stack((width * 0.5, height * 0.5), axis=-1)
    covar = tf.sqrt(covar)

    ##-----------------------------------------------------------------------------
    ##  Compute Normal Distribution for bounding boxes
    ##-----------------------------------------------------------------------------
    prob_grid = tf.ones([tf.shape(pt2_dense)[0], grid_h, grid_w],
                        dtype=tf.float32)
    logt('Prob_grid  ', prob_grid, verbose=verbose)

    # tfd = tf.contrib.distributions
    # mvn = tfd.MultivariateNormalDiag(loc = means,  scale_diag = covar)
    # prob_grid = mvn.prob(pos_grid)
    # print('    >> input to MVN.PROB: pos_grid (meshgrid) shape: ', pos_grid.shape)
    # print('     box_dims: ', box_dims.shape)
    # print('     Prob_grid shape from mvn.probe: ', prob_grid.shape)
    # prob_grid = tf.transpose(prob_grid,[2,0,1])
    # print('     Prob_grid shape after tanspose: ', prob_grid.shape)
    # print('    << output probabilities shape  : ', prob_grid.shape)

    #--------------------------------------------------------------------------------
    # Kill distributions of NaN boxes (resulting from bboxes with height/width of zero
    # which cause singular sigma cov matrices
    #--------------------------------------------------------------------------------
    # prob_grid = tf.where(tf.is_nan(prob_grid),  tf.zeros_like(prob_grid), prob_grid)

    #---------------------------------------------------------------------------------------------
    # (1) apply normalization per bbox heatmap instance
    #---------------------------------------------------------------------------------------------
    # print('\n    normalization ------------------------------------------------------')
    # normalizer = tf.reduce_max(prob_grid, axis=[-2,-1], keepdims = True)
    # normalizer = tf.where(normalizer < 1.0e-15,  tf.ones_like(normalizer), normalizer)
    # print('    normalizer     : ', normalizer.shape)
    # prob_grid_norm = prob_grid / normalizer

    #---------------------------------------------------------------------------------------------
    # (2) multiply normalized heatmap by normalized score in i  n_tensor/ (pt2_dense column 7)
    #     broadcasting : https://stackoverflow.com/questions/49705831/automatic-broadcasting-in-tensorflow
    #---------------------------------------------------------------------------------------------
    # prob_grid_norm_scaled = tf.transpose(tf.transpose(prob_grid_norm) * pt2_dense[:,7])
    # print('    prob_grid_norm_scaled : ', prob_grid_norm_scaled.shape)

    ##---------------------------------------------------------------------------------------------
    ## (NEW STEP) Clip heatmap to region surrounding Cy,Cx and Covar X, Y
    ##---------------------------------------------------------------------------------------------
    prob_grid_clipped = tf.map_fn(clip_heatmap, [prob_grid, cy, cx, covar],
                                  dtype=tf.float32,
                                  swap_memory=True)
    logt('prob_grid_clipped ', prob_grid_clipped, verbose=verbose)

    ##--------------------------------------------------------------------------------------------
    ## (0) Generate scores using prob_grid and pt2_dense - (NEW METHOD added 09-21-2018)
    ##  pt2_dense[:,7] is the per-class-normalized score from in_tensor
    ##
    ## 11-27-2018: (note - here, build_hm_score_v2 is being applied to prob_grid_clipped,
    ## unlilke chm_layer) - Changed to prob_grid to make it consistent with chm_layer.py
    ##
    ## When using prob_grid:
    ## [ 1.0000     1.0000   138.0000     1.0000  4615.0000  4531.1250  4615.0000
    ## [ 3.0000     1.0000   179.0000     1.0000   570.0000   547.5000   570.0000
    ##
    ## When using prob_grid_clipped:
    ## [ 1.0000     1.0000   138.0000     1.0000   144.0000  4531.1250   144.0000
    ## [ 3.0000     1.0000   179.0000     1.0000    56.0000   547.5000    56.0000
    ##--------------------------------------------------------------------------------------------
    old_style_scores = tf.map_fn(
        build_hm_score_v2, [prob_grid, pt2_dense_scaled, pt2_dense[:, 7]],
        dtype=tf.float32,
        swap_memory=True)
    old_style_scores = tf.scatter_nd(
        pt2_ind,
        old_style_scores, [batch_size, num_classes, rois_per_image, 3],
        name='scores_scattered')
    logt('old_style_scores ', old_style_scores, verbose=verbose)

    ##---------------------------------------------------------------------------------------------
    ## - Build alternative scores based on normalized/scaled/clipped heatmap
    ##---------------------------------------------------------------------------------------------
    alt_scores_1 = tf.map_fn(build_hm_score_v3,
                             [prob_grid_clipped, cy, cx, covar],
                             dtype=tf.float32)
    logt('alt_scores_1    ', alt_scores_1, verbose=verbose)
    alt_scores_1 = tf.scatter_nd(pt2_ind,
                                 alt_scores_1, [
                                     batch_size, num_classes, rois_per_image,
                                     KB.int_shape(alt_scores_1)[-1]
                                 ],
                                 name='alt_scores_1')

    alt_scores_1_norm = normalize_scores(alt_scores_1)
    logt('alt_scores_1(by class)      ', alt_scores_1, verbose=verbose)
    logt('alt_scores_1_norm(by_class) ', alt_scores_1_norm, verbose=verbose)

    ##-------------------------------------------------------------------------------------
    ## (3) scatter out the probability distribution heatmaps based on class
    ##-------------------------------------------------------------------------------------
    gauss_heatmap = tf.scatter_nd(
        pt2_ind,
        prob_grid_clipped,
        [batch_size, num_classes, rois_per_image, grid_w, grid_h],
        name='gauss_heatmap')
    logt(
        '\n    Scatter out the probability distributions based on class --------------'
    )
    logt('pt2_ind       ', pt2_ind, verbose=verbose)
    logt('prob_grid     ', prob_grid, verbose=verbose)
    logt('gauss_heatmap ', gauss_heatmap,
         verbose=verbose)  # batch_sz , num_classes, num_rois, image_h, image_w

    ##-------------------------------------------------------------------------------------
    ## (4) MAX : Reduce_MAX up gauss_heatmaps by class
    ##           Since all values are set to '1' in the 'heatmap', there is no need to
    ##           sum or normalize. We Reduce_max on the class axis, and as a result the
    ##           correspoding areas in the heatmap are set to '1'
    ##-------------------------------------------------------------------------------------
    gauss_heatmap = tf.reduce_max(gauss_heatmap, axis=2, name='gauss_heatmap')
    logt(
        '\n    Reduce MAX based on class -------------------------------------',
        verbose=verbose)
    logt(' gaussian_heatmap : ', gauss_heatmap, verbose=verbose)

    #---------------------------------------------------------------------------------------------
    # (5) heatmap normalization
    #     normalizer is set to one when the max of class is zero
    #     this prevents elements of gauss_heatmap_norm computing to nan
    #---------------------------------------------------------------------------------------------
    # print('\n    normalization ------------------------------------------------------')
    # normalizer = tf.reduce_max(gauss_heatmap, axis=[-2,-1], keepdims = True)
    # normalizer = tf.where(normalizer < 1.0e-15,  tf.ones_like(normalizer), normalizer)
    # gauss_heatmap_norm = gauss_heatmap / normalizer
    # print('    normalizer shape : ', normalizer.shape)
    # print('    gauss norm       : ', gauss_heatmap_norm.shape   ,' Keras tensor ', KB.is_keras_tensor(gauss_heatmap_norm) )

    ##---------------------------------------------------------------------------------------------
    ##  build indices and extract heatmaps corresponding to each bounding boxes' class id
    ##  build alternative scores#  based on normalized/sclaked clipped heatmap
    ##---------------------------------------------------------------------------------------------
    hm_indices = tf.cast(pt2_ind[:, :2], dtype=tf.int32)
    pt2_heatmaps = tf.gather_nd(gauss_heatmap, hm_indices)
    logt('hm_indices   ', hm_indices, verbose=verbose)
    logt('pt2_heatmaps ', pt2_heatmaps, verbose=verbose)

    alt_scores_2 = tf.map_fn(build_hm_score_v3, [pt2_heatmaps, cy, cx, covar],
                             dtype=tf.float32)
    logt('alt_scores_2  ', alt_scores_2, verbose=verbose)

    alt_scores_2 = tf.scatter_nd(pt2_ind,
                                 alt_scores_2, [
                                     batch_size, num_classes, rois_per_image,
                                     KB.int_shape(alt_scores_2)[-1]
                                 ],
                                 name='alt_scores_2')

    alt_scores_2_norm = normalize_scores(alt_scores_2)
    logt('alt_scores_2(by class)       : ', alt_scores_2, verbose=verbose)
    logt('alt_scores_2_norm(by_class)  : ', alt_scores_2_norm, verbose=verbose)

    ##--------------------------------------------------------------------------------------------
    ##  Transpose tensor to [BatchSz, Height, Width, Num_Classes]
    ##--------------------------------------------------------------------------------------------
    gauss_heatmap = tf.transpose(gauss_heatmap, [0, 2, 3, 1], name=names[0])

    # gauss_heatmap_norm = tf.transpose(gauss_heatmap_norm,[0,2,3,1], name = names[0]+'_norm')
    # print('    gauss_heatmap_norm : ', gauss_heatmap_norm.shape,' Keras tensor ', KB.is_keras_tensor(gauss_heatmap_norm) )
    # print('    complete')

    ##--------------------------------------------------------------------------------------------
    ## APPEND ALL SCORES TO input score tensor TO YIELD output scores tensor
    ##--------------------------------------------------------------------------------------------
    gauss_scores = tf.concat([
        in_tensor, old_style_scores, alt_scores_1, alt_scores_1_norm,
        alt_scores_2, alt_scores_2_norm
    ],
                             axis=-1,
                             name=names[0] + '_scores')
    #                                 alt_scores_2[...,:3], alt_scores_3],
    logt('gauss_heatmap  ', gauss_heatmap, verbose=verbose)
    logt('gauss_scores', gauss_scores, verbose=verbose)
    logt('complete    ', verbose=verbose)

    return gauss_heatmap, gauss_scores
示例#11
0
    def call(self, inputs):
        fcn_heatmap, pr_hm_scores = inputs

        logt('> FCNScoreLayer Call() ', len(inputs), verbose=verbose)
        logt('  fcn_heatmap.shape    ', fcn_heatmap, verbose=verbose)
        logt('  pr_hm_scores.shape   ', pr_hm_scores, verbose=verbose)

        fcn_scores = fcn_scoring_graph([fcn_heatmap, pr_hm_scores],
                                       self.config)

        logt('\n   Output build_fcn_score ', verbose=verbose)
        logt('     fcn_scores   ', fcn_scores, verbose=verbose)
        logt('     complete', verbose=verbose)

        return [fcn_scores]
def fpn_classifier_graph(rois,
                         feature_maps,
                         image_shape,
                         pool_size,
                         num_classes,
                         verbose=0):
    '''
    Builds the computation graph of the feature pyramid network classifier
    and regressor heads.
    
    Inputs:
    -------
    rois:               [batch, num_rois, 4 ] 
                        Proposal boxes in normalized coordinates (y1, x1, y2, x2)
                        
    feature_maps:       List of feature maps from diffent layers of the pyramid,
                        [P2, P3, P4, P5]. Each has a different resolution.
    image_shape:        [height, width, depth]
    
    pool_size:          The width of the square feature map generated from ROI Pooling.
    
    num_classes:        number of classes, which determines the depth of the results

    Returns:
    --------
    logits:             [N, NUM_CLASSES] classifier logits (before softmax)
    probs:              [N, NUM_CLASSES] classifier probabilities
    bbox_deltas:        [N, (dy, dx, log(dh), log(dw))] 
                        Deltas to apply to proposal boxes
                        
    '''
    print('\n>>> FPN Classifier Graph verbose:', verbose)
    if verbose:

        logt('    INPUT: rois shape ', rois)
        logt('    INPUT: mrcnn feature_maps ', len(feature_maps))
        logt('    -      feature_map P2 ', feature_maps[0])
        logt('    -      feature_map P3 ', feature_maps[1])
        logt('    -      feature_map P4 ', feature_maps[2])
        logt('    -      feature_map P5 ', feature_maps[3])
        logt('    INPUT: image_shape', image_shape)
        logt('    INPUT: pool_size  ', pool_size)
        logt('    INPUT: num_classes', num_classes)

    # ROI Pooling
    # Shape: [batch, num_boxes, pool_height, pool_width, channels]

    x = PyramidROIAlign([pool_size, pool_size],
                        image_shape,
                        name="roi_align_classifier")([rois] + feature_maps)
    logt('roi_align_classifier ', x, verbose=verbose)

    # Two 1024 FC layers (implemented with Conv2D for consistency)
    #-------------------------------------------------------------------------------------------
    # TimeDistributed :
    #
    #   Applies the Conv2D layer to each slice of the batch input. The input should be at least 3D,
    #   and the dimension of index one will be considered to be the temporal dimension.
    #
    # Example:
    #   Consider a batch of 32 samples, where each sample is a sequence of 10 vectors of 16 dimensions.
    #   The batch input shape of the layer is then (32, 10, 16). The input_shape, not including the
    #   samples dimension, is (10, 16).
    #   You can then use TimeDistributed to apply a Dense layer to each of the 10 timesteps, independently:
    #   ## as the first layer in a model
    #   model = Sequential()
    #   model.add(TimeDistributed(Dense(8), input_shape=(10, 16)))
    #   ## now model.output_shape == (None, 10, 8)
    #
    #   In subsequent layers, there is no need for the input_shape:
    #
    #   model.add(TimeDistributed(Dense(32)))
    #   # now model.output_shape == (None, 10, 32)
    #
    #   The output will then have shape (32, 10, 32).
    #-------------------------------------------------------------------------------------------

    x = KL.TimeDistributed(KL.Conv2D(1024, (pool_size, pool_size),
                                     padding="valid"),
                           name="mrcnn_class_conv1")(x)
    logt('mrcnn_class_conv1', x, verbose=verbose)

    x = KL.TimeDistributed(BatchNorm(axis=3), name='mrcnn_class_bn1')(x)
    logt('mrcnn_class_bn1  ', x, verbose=verbose)

    x = KL.Activation('relu')(x)
    logt('mrcnn_class_relu1', x, verbose=verbose)
    logt(verbose=verbose)
    # x = KL.Dropout(0.5)(x)
    x = KL.TimeDistributed(KL.Conv2D(1024, (1, 1)),
                           name="mrcnn_class_conv2")(x)
    logt('mrcnn_class_conv2 ', x, verbose=verbose)

    x = KL.TimeDistributed(BatchNorm(axis=3), name='mrcnn_class_bn2')(x)
    logt('mrcnn_class_bn2   ', x, verbose=verbose)

    x = KL.Activation('relu')(x)
    logt('mrcnn_class_relu2 ', x, verbose=verbose)
    logt(verbose=verbose)

    shared = KL.Lambda(lambda x: KB.squeeze(KB.squeeze(x, 3), 2),
                       name="pool_squeeze")(x)
    logt('pool_squeeze(Shared)', shared, verbose=verbose)

    ## Classifier head
    # x = KL.TimeDistributed(KL.Dense(num_classes, name = 'mrcnn_class_logits'))(shared)
    mrcnn_class_logits = KL.TimeDistributed(KL.Dense(num_classes),
                                            name='mrcnn_class_logits')(shared)
    logt('mrcnn_class_logits ', mrcnn_class_logits, verbose=verbose)

    mrcnn_class_logits = KL.Lambda(
        lambda x: KB.identity(x, name='mrcnn_class_logits'),
        name='mrcnn_logits_lambda')(mrcnn_class_logits)
    logt('mrcnn_class_logits (final)', mrcnn_class_logits, verbose=verbose)

    # x = KL.TimeDistributed(KL.Activation("softmax"))(mrcnn_class_logits)
    mrcnn_probs = KL.TimeDistributed(
        KL.Activation("softmax"), name='mrcnn_class_act')(mrcnn_class_logits)
    logt('mrcnn_probs  ', mrcnn_probs, verbose=verbose)

    mrcnn_probs = KL.Lambda(lambda x: KB.identity(x, name='mrcnn_class'),
                            name='mrcnn_class_lambda')(mrcnn_probs)
    logt('mrcnn_probs (final) ', mrcnn_probs, verbose=verbose)

    ## BBox head
    # [batch, boxes, num_classes * (dy, dx, log(dh), log(dw))]
    x = KL.TimeDistributed(KL.Dense(num_classes * 4, activation='linear'),
                           name='mrcnn_bbox_fc')(shared)
    logt('mrcnn_bbox_fc ', x, verbose=verbose)

    # Reshape to [batch, boxes, num_classes, (dy, dx, log(dh), log(dw))]
    s = KB.int_shape(x)
    mrcnn_bbox = KL.Reshape((s[1], num_classes, 4), name='mrcnn_bbox_rs')(x)
    logt('mrcnn_bbox_fc reshaped output', mrcnn_bbox, verbose=verbose)
    # mrcnn_bbox = KB.identity(mrcnn_bbox, name = "mrcnn_bbox")
    mrcnn_bbox = KL.Lambda(lambda x: KB.identity(x, name='mrcnn_bbox'),
                           name='mrcnn_bbox_lambda')(mrcnn_bbox)

    logt('mrcnn_bbox (final)', mrcnn_bbox, verbose=verbose)

    return mrcnn_class_logits, mrcnn_probs, mrcnn_bbox
def fpn_graph(Resnet_Layers, verbose=0):
    """
    #----------------------------------------------------------------------------
    # Build the Feature Pyramid Network (FPN) layers.
    # Top-down Layers
    # Returns a list of the last layers of each stage, 5 in total.
    # Don't create the thead (stage 5), so we pick the 4th item in the list.
    #----------------------------------------------------------------------------        
    # Top-down Layers
    # TODO: add assert to varify feature map sizes match what's in config
    """
    print('\n>>> Feature Pyramid Network (FPN) Graph ')

    _, C2, C3, C4, C5 = Resnet_Layers

    logt('Input FPN C5 ', C5, verbose=verbose)
    logt('Input FPN C4 ', C4, verbose=verbose)
    logt('Input FPN C3 ', C3, verbose=verbose)
    logt('Input FPN C2 ', C2, verbose=verbose)

    P5 = KL.Conv2D(256, (1, 1), name='fpn_c5p5')(C5)
    logt('FPN P5 ', P5, verbose=verbose)

    x = KL.UpSampling2D(size=(2, 2))(P5)
    y = KL.Conv2D(256, (1, 1))(C4)
    logt('   Upsampled P5 (x)', x, verbose=verbose)
    logt('   Conv2D    C4 (y)', y, verbose=verbose)

    P4 = KL.Add(name="fpn_p4add")([
        KL.UpSampling2D(size=(2, 2), name="fpn_p5upsampled")(P5),
        KL.Conv2D(256, (1, 1), name='fpn_c4p4')(C4)
    ])
    logt('FPN P4 (x+y)', P4, verbose=verbose)

    x = KL.UpSampling2D(size=(2, 2))(P4)
    y = KL.Conv2D(256, (1, 1))(C3)
    logt('   Upsampled P4 (x)', x, verbose=verbose)
    logt('   Conv2D    C3 (y)', y, verbose=verbose)

    P3 = KL.Add(name="fpn_p3add")([
        KL.UpSampling2D(size=(2, 2), name="fpn_p4upsampled")(P4),
        KL.Conv2D(256, (1, 1), name='fpn_c3p3')(C3)
    ])
    logt('FPN P3 (x+y)', P3, verbose=verbose)

    x = KL.UpSampling2D(size=(2, 2))(P3)
    y = KL.Conv2D(256, (1, 1))(C2)
    logt('   Upsampled P3 (x)', x, verbose=verbose)
    logt('   Conv2D    C2 (y)', y, verbose=verbose)

    P2 = KL.Add(name="fpn_p2add")([
        KL.UpSampling2D(size=(2, 2), name="fpn_p3upsampled")(P3),
        KL.Conv2D(256, (1, 1), name='fpn_c2p2')(C2)
    ])
    logt('FPN P2 (x+y)', P2, verbose=verbose)

    # Attach 3x3 conv to all P layers to get the final feature maps.
    P2 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p2")(P2)
    P3 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p3")(P3)
    P4 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p4")(P4)
    P5 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p5")(P5)

    # P6 is used for the 5th anchor scale in RPN. Generated by
    # subsampling from P5 with stride of 2.
    P6 = KL.MaxPooling2D(pool_size=(1, 1), strides=2, name="fpn_p6")(P5)
    if verbose:
        print()
        print('    FPN Final output')
        logt('     FPN P6 (Maxpool2D of P5 w/ stride 2)', P6)
        logt('     FPN P5 (Conv2D (3,3) of P5)', P5)
        logt('     FPN P4 (Conv2D (3,3) of P4)', P4)
        logt('     FPN P3 (Conv2D (3,3) of P3)', P3)
        logt('     FPN P2 (Conv2D (3,3) of P2)', P2)

    return [P2, P3, P4, P5, P6]
def resnet_graph(input_image, architecture, stage5=False, verbose = 0):

    assert architecture in ["resnet50", "resnet101"]
    print()
    print('----------------------------')
    print('>>> Resnet Graph ')
    print('----------------------------')
    print('     Input_image shape :', input_image.shape)
    
    # Stage 1 : Convolutional Layer 1
    #   zero pad image 3 x 3 
    #   apply 2D convolution of 64 filters with kernal size of 7 x 7 stride 2 x 2
    #   apply batch normalization to output
    #   apply Relu activation 
    #   apply max pooling (3,3) stride (2,2)
    x = KL.ZeroPadding2D((3, 3))(input_image)
    logt('After ZeroPadding2D  ', x, verbose = verbose)
    
    x = KL.Conv2D(64, (7, 7), strides=(2, 2), name='conv1', use_bias=True)(x)
    logt('After Conv2D padding :', x, verbose = verbose)
    
    x = BatchNorm(axis=3, name='bn_conv1')(x)
    logt('After BatchNorm', x, verbose = verbose)
    
    x = KL.Activation('relu')(x)
    
    C1 = x = KL.MaxPooling2D((3, 3), strides=(2, 2), padding="same")(x)
    logt('C1 ', C1, verbose = verbose)
    
    # Stage 2
    #   conv block , kernel size: 3, filters: [64, 64, 256]
    x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
    x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
    
    C2 = x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
    logt('C2  ', C2, verbose = verbose)
    
    # Stage 3
    x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
    C3 = x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')
    logt('C3  ', C3, verbose = verbose)
    
    # Stage 4
    x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
    block_count = {"resnet50": 5, "resnet101": 22}[architecture]
    for i in range(block_count):
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block=chr(98 + i))
    C4 = x
    logt('C4 ', C4, verbose = verbose)
    
    # Stage 5
    if stage5:
        x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
        x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
        C5 = x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')
    else:
        C5 = None
    logt('C5 ', C5, verbose = verbose)
    
    return [C1, C2, C3, C4, C5]
def fcn_heatmap_CE_loss_graph_2(target_heatmap, pred_heatmap, active_class_ids):
    '''
    Categorical Cross Entropy Loss for the FCN heatmaps.

    target_class_ids:       [batch, num_rois]. Integer class IDs. Uses zero
                            padding to fill in the array.
    
    pred_class_logits:      [batch, num_rois, num_classes]
    
    active_class_ids:       [batch, num_classes]. Has a value of 1 for
                            classes that are in the dataset of the image, and 0
                            for classes that are not in the dataset. 
    '''
    print()
    print('--------------------------------' )
    print('>>> fcn_heatmap_CE_loss_graph_2 ' )
    print('--------------------------------' )
    logt('target_class_ids  ', target_heatmap)
    logt('pred_class_logits ', pred_heatmap  )
    logt('active_class_ids  ', active_class_ids)
    # target_class_ids = tf.cast(target_class_ids, 'int64')
    
    # Find predictions of classes that are not in the dataset.
    pred_class_ids = KB.argmax(pred_heatmap  , axis=-1)
    gt_class_ids   = KB.argmax(target_heatmap, axis=-1)
    logt('pred_class_ids    ', pred_class_ids) 
    logt('gt_class_ids      ', gt_class_ids  ) 

    # TODO: Update this line to work with batch > 1. Right now it assumes all
    #       images in a batch have the same active_class_ids
    pred_active = tf.gather(active_class_ids[0], pred_class_ids)
    
    # Loss
    loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels=target_heatmap, logits=pred_heatmap)
    logt('pred_active       ', pred_active)
    logt('loss              ', loss)    

    # Erase losses of predictions of classes that are not in the active
    # classes of the image.
    # loss = loss * pred_active
    # print('loss*pred_active ', loss)

    # Compute  loss mean. Use only predictions that contribute to the loss to get a correct mean.
    loss = tf.reduce_sum(loss)   ##/ tf.reduce_sum(pred_active)
    loss_mean  = KB.mean(loss)
    loss_final = tf.reshape(loss_mean, [1, 1], name = "fcn_CE_loss")
    
    logt('loss      ', loss)
    logt('loss mean ', loss_mean)
    logt('loss final', loss_final)
    
    return loss_final
def build_heatmap_inference(in_tensor, config, names = None):
    '''
    input:
    -------
        pred_tensor:    [ Bsz, Num_Classes, 200, 9 : {y1,x1,y2,x2, class, score, det_type, sequence_id, normalized_score}]
                         
    output:
    -------    
        pr_heatmap      (None,  Heatmap-height, Heatmap_width, num_classes)
        pr_scores       (None, num_classes, 200, 24) 
                        [batchSz, Detection_Max_instance, (y1,x1,y2,x2, class, score, det_type, sequence_id, normalized_score,
                                                           scores-0: gaussian_sum, bbox_area, weighted_norm_sum 
                                                           scores-1: score, mask_sum, score/mask_sum, (score, mask_sum, score/mask_sum) normalized by class
                                                           scores-2: score, mask_sum, score/mask_sum, (score, mask_sum, score/mask_sum) normalized by class ]
    '''
    verbose           = config.VERBOSE
    num_detections    = config.DETECTION_MAX_INSTANCES
    img_h, img_w      = config.IMAGE_SHAPE[:2]
    batch_size        = config.BATCH_SIZE
    num_classes       = config.NUM_CLASSES 
    heatmap_scale     = config.HEATMAP_SCALE_FACTOR
    grid_h, grid_w    = config.IMAGE_SHAPE[:2] // heatmap_scale    
    # rois_per_image  = config.DETECTION_PER_CLASS
    rois_per_image    = (in_tensor.shape)[2]  
    CLASS_COLUMN      = 4
    SCORE_COLUMN      = 5
    DT_TYPE_COLUMN    = 6
    SEQUENCE_COLUMN   = 7
    NORM_SCORE_COLUMN = 8

    if verbose:
        print('\n ')
        print('  > build_inference_heatmap() for ', names )
        print('    in_tensor shape        : ', in_tensor.shape)       
        print('    num bboxes per class   : ', rois_per_image )
        print('    heatmap scale          : ', heatmap_scale, 'Dimensions:  w:', grid_w,' h:', grid_h)

    ##-----------------------------------------------------------------------------    
    ## Stack non_zero bboxes from in_tensor into pt2_dense 
    ##-----------------------------------------------------------------------------
    # pt2_ind shape is [?, 3].                    pt2_dense shape is [?, 7]
    #    pt2_ind[0] corresponds to image_index       pt2_dense[0:3]  roi coordinates 
    #    pt2_ind[1] corresponds to class_index       pt2_dense[4]    class id 
    #    pt2_ind[2] corresponds to roi row_index     pt2_dense[5]    score from mrcnn    
    #                                                pt2_dense[6]    bbox sequence id    
    #                                                pt2_dense[7]    per-class normalized score 
    #-----------------------------------------------------------------------------
    pt2_sum = tf.reduce_sum(tf.abs(in_tensor[:,:,:,:4]), axis=-1)
    pt2_ind = tf.where(pt2_sum > 0)
    pt2_dense = tf.gather_nd( in_tensor, pt2_ind)

    logt('pt2_sum   ', pt2_sum, verbose = verbose)
    logt('pt2_ind   ', pt2_ind, verbose = verbose)
    logt('pt2_dense ', pt2_dense, verbose = verbose)

    ##-----------------------------------------------------------------------------
    ## Build mesh-grid to hold pixel coordinates  
    ##-----------------------------------------------------------------------------
    X = tf.range(grid_w, dtype=tf.int32)
    Y = tf.range(grid_h, dtype=tf.int32)
    X, Y = tf.meshgrid(X, Y)

    # duplicate (repeat) X and Y into a  batch_size x rois_per_image tensor
    ones = tf.ones([tf.shape(pt2_dense)[0] , 1, 1], dtype = tf.int32)
    rep_X = ones * X
    rep_Y = ones * Y 
    
    if verbose:
        print('    X/Y shapes :',  X.get_shape(), Y.get_shape())
        print('    Ones:    ', ones.shape)                
        print('    ones_exp * X', ones.shape, '*', X.shape, '= ',rep_X.shape)
        print('    ones_exp * Y', ones.shape, '*', Y.shape, '= ',rep_Y.shape)

    # # stack the X and Y grids 
    pos_grid = tf.to_float(tf.stack([rep_X,rep_Y], axis = -1))
    logt('pos_grid before transpse ', pos_grid, verbose = verbose)
    pos_grid = tf.transpose(pos_grid,[1,2,0,3])
    logt('pos_grid after transpose ', pos_grid, verbose = verbose)  

    ##-----------------------------------------------------------------------------
    ##  Build mean and convariance tensors for Multivariate Normal Distribution 
    ##-----------------------------------------------------------------------------
    bboxes_scaled = pt2_dense[:,:4]/heatmap_scale
    width  = bboxes_scaled[:,3] - bboxes_scaled[:,1]      # x2 - x1
    height = bboxes_scaled[:,2] - bboxes_scaled[:,0]
    cx     = bboxes_scaled[:,1] + ( width  / 2.0)
    cy     = bboxes_scaled[:,0] + ( height / 2.0)
    means  = tf.stack((cx,cy),axis = -1)
    covar  = tf.stack((width * 0.5 , height * 0.5), axis = -1)
    covar  = tf.sqrt(covar)

    ## Added 2019-05-12 to prevent NaN when bounding box is extremely small 
    ## resulting in width or height being equal to zero 
    covar  = tf.where(covar < 1.0e-15, tf.ones_like(covar), covar)
    
    ##-----------------------------------------------------------------------------
    ##  Compute Normal Distribution for bounding boxes
    ##-----------------------------------------------------------------------------    
    tfd = tf.contrib.distributions
    mvn = tfd.MultivariateNormalDiag(loc = means,  scale_diag = covar)
    prob_grid = mvn.prob(pos_grid)
    logt('Input to MVN.PROB: pos_grid (meshgrid) ', pos_grid, verbose = verbose)
    logt('Prob_grid shape from mvn.probe  ',prob_grid, verbose = verbose)
    prob_grid = tf.transpose(prob_grid,[2,0,1])
    logt('Prob_grid shape after tanspose ', prob_grid, verbose = verbose)
    logt('Output probabilities shape   '  , prob_grid, verbose = verbose)
    
    ##--------------------------------------------------------------------------------------------
    ## (0) Generate scores using prob_grid and pt2_dense - (NEW METHOD added 09-21-2018)
    ##--------------------------------------------------------------------------------------------
    old_style_scores = tf.map_fn(build_hm_score_v2, [prob_grid, bboxes_scaled, pt2_dense[ :, NORM_SCORE_COLUMN ] ], 
                                 dtype = tf.float32, swap_memory = True)
    old_style_scores = tf.scatter_nd(pt2_ind, old_style_scores, 
                                     [batch_size, num_classes, rois_per_image, KB.int_shape(old_style_scores)[-1]],
                                     name = 'scores_scattered')
    logt('old_style_scores        :',  old_style_scores, verbose = verbose)

    ##----------------------------------------------------------------------------------------------------
    ## Generate scores using same method as FCN, over the prob_grid
    ## using (prob_grid_clipped) as input is superfluous == RETURNS EXACT SAME Results AS prob_grid above
    ##----------------------------------------------------------------------------------------------------
    # alt_scores_0 = tf.map_fn(build_hm_score_v3, [prob_grid, cy, cx,covar], dtype=tf.float32)    
    # print('    alt_scores_0 : ', KB.int_shape(alt_scores_0), ' Keras tensor ', KB.is_keras_tensor(alt_scores_0) )
    # alt_scores_0 = tf.scatter_nd(pt2_ind, alt_scores_0, 
    #                                  [batch_size, num_classes, rois_per_image, KB.int_shape(alt_scores_0)[-1]], name = 'alt_scores_0')

    ##---------------------------------------------------------------------------------------------
    ## (NEW STEP - Clipped heatmaps) 
    ## (1)  Clip heatmap to region surrounding Cy,Cx and Covar X, Y 
    ##      Similar ro what is being done for gt_heatmap in CHMLayerTarget 
    ##---------------------------------------------------------------------------------------------    
    prob_grid_clipped = tf.map_fn(clip_heatmap, [prob_grid, cy,cx, covar], dtype = tf.float32, swap_memory = True)  
    logt('    prob_grid_clipped : ', prob_grid_clipped, verbose = verbose)


    ##---------------------------------------------------------------------------------------------
    ## (2) apply normalization per bbox heatmap instance --> move to [0,1] range
    ##---------------------------------------------------------------------------------------------
    logt('\n    normalization ------------------------------------------------------', verbose = verbose)   
    normalizer = tf.reduce_max(prob_grid_clipped, axis=[-2,-1], keepdims = True)
    normalizer = tf.where(normalizer < 1.0e-15,  tf.ones_like(normalizer), normalizer)
    logt('    normalizer     : ', normalizer, verbose = verbose)
    prob_grid_cns = prob_grid_clipped / normalizer
    logt('    prob_grid_cns: clipped/normed/scaled : ', prob_grid_cns, verbose = verbose)
    
    
    ## replace above lines with lines below
    ## x_max = tf.reduce_max(prob_grid_clipped, axis=[-2,-1], keepdims = True)
    ## x_min = tf.reduce_min(prob_grid_clipped, axis=[-2,-1], keepdims = True)
    ##logt('    Reduce Max Shape: ', x_max, verbose = verbose)
    ##logt('    Reduce Min Shape: ', x_min, verbose = verbose)
    ## prob_grid_cns = (prob_grid_clipped - x_min) / (x_max - x_min)   
    ## logt('    prob_grid_cns: clipped/normed/scaled : ', prob_grid_cns, verbose = verbose)
   
    ##---------------------------------------------------------------------------------------------
    ## (3) multiply normalized heatmap by normalized score in in_tensor/ (pt2_dense NORM_SCORE_COLUMN)
    ##     broadcasting : https://stackoverflow.com/questions/49705831/automatic-broadcasting-in-tensorflow
    ##---------------------------------------------------------------------------------------------    
    prob_grid_cns = tf.transpose(tf.transpose(prob_grid_cns) * pt2_dense[ :, NORM_SCORE_COLUMN ])
    logt('    prob_grid_cns: clipped/normed/scaled : ', prob_grid_cns, verbose = verbose)


    ##---------------------------------------------------------------------------------------------
    ## - Build alternative scores based on normalized/scaled/clipped heatmap
    ##---------------------------------------------------------------------------------------------
    alt_scores_1 = tf.map_fn(build_hm_score_v3, [prob_grid_cns, cy, cx,covar], dtype=tf.float32)    
    logt('alt_scores_1 ', alt_scores_1, verbose = verbose)
    alt_scores_1 = tf.scatter_nd(pt2_ind, alt_scores_1, 
                                     [batch_size, num_classes, rois_per_image, KB.int_shape(alt_scores_1)[-1]], name = 'alt_scores_1')  

    logt('alt_scores_1(by class) ', alt_scores_1, verbose = verbose)
    alt_scores_1_norm = normalize_scores(alt_scores_1)
    logt('alt_scores_1_norm(by_class) ', alt_scores_1_norm, verbose = verbose)
    
    # alt_scores_1_norm = tf.gather_nd(alt_scores_1_norm, pt2_ind)
    # print('    alt_scores_1_norm(by_image)  : ', alt_scores_1_norm.shape, KB.int_shape(alt_scores_1_norm))

    ##-------------------------------------------------------------------------------------
    ## (3) scatter out the probability distributions based on class 
    ##-------------------------------------------------------------------------------------
    gauss_heatmap   = tf.scatter_nd(pt2_ind, prob_grid_cns, 
                                    [batch_size, num_classes, rois_per_image, grid_w, grid_h], name = 'gauss_scatter')
    logt('\n    Scatter out the probability distributions based on class --------------', verbose = verbose)
    logt('pt2_ind shape      ', pt2_ind      , verbose = verbose)
    logt('prob_grid_clippped ', prob_grid_cns, verbose = verbose)
    logt('gauss_heatmap      ', gauss_heatmap, verbose = verbose)   # batch_sz , num_classes, num_rois, image_h, image_w
    
    
    ##-------------------------------------------------------------------------------------
    ## Construction of Gaussian Heatmap output using Reduce SUM
    ##
    ## (4) SUM : Reduce and sum up gauss_heatmaps by class  
    ## (5) heatmap normalization (per class)
    ## (6) Transpose heatmap to shape required for FCN
    ##-------------------------------------------------------------------------------------
    gauss_heatmap_sum = tf.reduce_sum(gauss_heatmap, axis=2, name='gauss_heatmap_sum')
    logt('\n    Reduce SUM based on class and normalize within each class -----------------------', verbose = verbose)
    logt('gaussian_heatmap_sum ', gauss_heatmap_sum , verbose = verbose)


    ## normalize in class
    normalizer = tf.reduce_max(gauss_heatmap_sum, axis=[-2,-1], keepdims = True)
    normalizer = tf.where(normalizer < 1.0e-15,  tf.ones_like(normalizer), normalizer)
    gauss_heatmap_sum = gauss_heatmap_sum / normalizer
    logt('normalizer shape   : ', normalizer, verbose = verbose)
    logt('normalized heatmap : ', gauss_heatmap_sum, verbose = verbose)

    ## replaced above  with following two lines:::  5-30-19
    ## gauss_heatmap_sum = tf.transpose(gauss_heatmap_sum, [0,2,3,1])
    ## gauss_heatmap_sum = normalize_heatmaps(gauss_heatmap_sum)   
    ## logt('normalized heatmap : ', gauss_heatmap_sum, verbose = verbose)
    
    ##---------------------------------------------------------------------------------------------
    ##  Score on reduced sum heatmaps. 
    ##
    ##  build indices and extract heatmaps corresponding to each bounding boxes' class id
    ##  build alternative scores#  based on normalized/sclaked clipped heatmap
    ##---------------------------------------------------------------------------------------------
    hm_indices = tf.cast(pt2_ind[:, :2],dtype=tf.int32)
    logt('hm_indices   ',  hm_indices, verbose = verbose)
       
    pt2_heatmaps = tf.gather_nd(gauss_heatmap_sum, hm_indices )
    
    ## added5-30-2019 to replace above line 
    ## pt2_heatmaps = tf.transpose(gauss_heatmap_sum, [0,3,1,2])
    ## pt2_heatmaps = tf.gather_nd(pt2_heatmaps, hm_indices )

    logt('pt2_heatmaps ',  pt2_heatmaps, verbose = verbose)

    alt_scores_2 = tf.map_fn(build_hm_score_v3, [pt2_heatmaps, cy, cx,covar], dtype=tf.float32)    
    logt('    alt_scores_2    : ', alt_scores_2, verbose = verbose)
    
    alt_scores_2 = tf.scatter_nd(pt2_ind, alt_scores_2, 
                                [batch_size, num_classes, rois_per_image, KB.int_shape(alt_scores_2)[-1]], name = 'alt_scores_2')  

    logt('alt_scores_2(scattered)     ', alt_scores_2, verbose = verbose)
    alt_scores_2_norm = normalize_scores(alt_scores_2)
    logt('alt_scores_2_norm(by_class) ', alt_scores_2_norm, verbose = verbose)

    ##---------------------------------------------------------------------------------------------
    ## (6) Transpose heatmaps to shape required for FCN [batchsize , width, height, num_classes]
    ##---------------------------------------------------------------------------------------------
    gauss_heatmap_sum = tf.transpose(gauss_heatmap_sum           ,[0,2,3,1], name = names[0])
    logt(' gauss_heatmap_sum (final) ', gauss_heatmap_sum, verbose = verbose)
    # gauss_heatmap_sum_normalized = tf.transpose(gauss_heatmap_sum_normalized,[0,2,3,1], name = names[0]+'_norm')   
    # print('    reshaped heatmap normalized    : ', gauss_heatmap_sum_normalized.shape,' Keras tensor ', KB.is_keras_tensor(gauss_heatmap_sum_normalized) )

    # gauss_heatmap_max            = tf.transpose(gauss_heatmap_max           ,[0,2,3,1], name = names[0]+'_max')
    # print('    reshaped heatmap_max           : ', gauss_heatmap_max.shape,' Keras tensor ', KB.is_keras_tensor(gauss_heatmap_max) )
    # gauss_heatmap_max_normalized = tf.transpose(gauss_heatmap_max_normalized,[0,2,3,1], name = names[0]+'_max_norm') 
    # print('    reshaped heatmap_max normalized: ', gauss_heatmap_max_normalized.shape,' Keras tensor ', KB.is_keras_tensor(gauss_heatmap_max_normalized) )

    ##--------------------------------------------------------------------------------------------
    ## APPEND ALL SCORES TO input score tensor TO YIELD output scores tensor
    ##--------------------------------------------------------------------------------------------
    gauss_scores     = tf.concat([in_tensor, old_style_scores, alt_scores_1, alt_scores_1_norm, alt_scores_2, alt_scores_2_norm],
                                  axis = -1,name = names[0]+'_scores')
    logt('    gauss_scores    : ', gauss_scores, verbose = verbose)
    logt('    complete', verbose = verbose)

    return   gauss_heatmap_sum, gauss_scores  
def build_gt_tensor(gt_class_ids, norm_gt_bboxes, config):
    verbose = config.VERBOSE
    batch_size = config.BATCH_SIZE
    num_classes = config.NUM_CLASSES
    h, w = config.IMAGE_SHAPE[:2]
    det_per_class = config.DETECTION_PER_CLASS
    num_bboxes = KB.int_shape(norm_gt_bboxes)[1]

    scale = tf.constant([h, w, h, w], dtype=tf.float32)
    # dup_scale       = tf.reshape(tf.tile(scale, [num_rois]),[num_rois,-1])
    dup_scale = scale * tf.ones([batch_size, num_bboxes, 1], dtype='float32')
    gt_bboxes = tf.multiply(norm_gt_bboxes, dup_scale)

    # num of bounding boxes is determined by bbox_list.shape[1] instead of config.DETECTION_MAX_INSTANCES
    # use of this routine for both input_gt_boxes, and target_gt_deltas
    if num_bboxes == config.DETECTION_MAX_INSTANCES:
        tensor_name = "gt_tensor_max"
    else:
        tensor_name = "gt_tensor"

    if verbose:
        print('\n')
        print('  > BUILD_GROUND TRUTH_TF()')
        print('    num_bboxes             : ', num_bboxes, '(building ',
              tensor_name, ')')
        print('    gt_class_ids shape     : ', gt_class_ids.get_shape(), '  ',
              KB.int_shape(gt_class_ids))
        print('    norm_gt_bboxes.shape   : ', norm_gt_bboxes.get_shape(),
              '  ', KB.int_shape(norm_gt_bboxes))
        print('    gt_bboxes.shape        : ', gt_bboxes.get_shape(), '  ',
              KB.int_shape(gt_bboxes))

    #---------------------------------------------------------------------------
    # use the argmaxof each row to determine the dominating (predicted) class
    # mask identifies class_ids > 0
    #---------------------------------------------------------------------------
    gt_classes_exp = tf.to_float(tf.expand_dims(gt_class_ids, axis=-1))
    logt('gt_classes_exp ', gt_classes_exp, verbose=verbose)

    ones = tf.ones_like(gt_class_ids)
    zeros = tf.zeros_like(gt_class_ids)
    mask = tf.greater(gt_class_ids, 0)

    gt_scores = tf.where(mask, ones, zeros)
    gt_scores_exp = tf.to_float(KB.expand_dims(gt_scores, axis=-1))
    logt('gt_scores_exp  ', gt_scores_exp, verbose=verbose)

    ##------------------------------------------------------------------------------------
    ## Generate GT_ARRAY
    ##    Note that we add gt_scores_exp also at the end, to match the the dimensions of
    ##    pred_tensor generated in build_predictions (corresponds to the normalized score)
    ##
    ##    sequence id is used to preserve the order of rois as passed to this routine
    ##------------------------------------------------------------------------------------
    batch_grid, bbox_grid = tf.meshgrid(tf.range(batch_size, dtype=tf.int32),
                                        tf.range(num_bboxes, dtype=tf.int32),
                                        indexing='ij')

    sequence = gt_scores * (bbox_grid[..., ::-1] + 1)
    sequence = tf.to_float(tf.expand_dims(sequence, axis=-1))
    gt_array = tf.concat(
        [gt_bboxes, gt_classes_exp, gt_scores_exp, sequence, gt_scores_exp],
        axis=-1,
        name='gt_array')

    # print('    batch_grid shape  ', batch_grid.get_shape())
    # print('    bbox_grid  shape  ', bbox_grid.get_shape())
    # print('    sequence shape    ', sequence.get_shape())

    ##------------------------------------------------------------------------------
    ## Create indicies to scatter rois out to multi-dim tensor by image id and class
    ## resulting tensor is batch size x num_classes x num_bboxes x 7 (num columns)
    ##------------------------------------------------------------------------------
    scatter_ind = tf.stack([batch_grid, gt_class_ids, bbox_grid], axis=-1)
    gt_scatter = tf.scatter_nd(
        scatter_ind, gt_array,
        [batch_size, num_classes, num_bboxes, gt_array.shape[-1]])

    logt('gt_array    ', gt_array, verbose=verbose)
    logt('scatter_ind ', scatter_ind, verbose=verbose)
    logt('gt_array    ', gt_array, verbose=verbose)
    logt('gt_scatter  ', gt_scatter, verbose=verbose)

    ##-------------------------------------------------------------------------------
    ## sort in each class dimension based on on sequence number (column 6)
    ##     scatter_nd places bboxs in a sparse fashion --- this sort is to place all bboxes
    ## at the top of the class bbox array
    ##-------------------------------------------------------------------------------
    _, sort_inds = tf.nn.top_k(tf.abs(gt_scatter[:, :, :, 6]),
                               k=gt_scatter.shape[2])

    # build indexes to gather rows from pred_scatter based on sort order
    class_grid, batch_grid, bbox_grid = tf.meshgrid(tf.range(num_classes),
                                                    tf.range(batch_size),
                                                    tf.range(num_bboxes))
    bbox_grid_exp = tf.to_float(tf.expand_dims(bbox_grid, axis=-1))

    gather_inds = tf.stack([batch_grid, class_grid, sort_inds], axis=-1)
    gt_tensor = tf.gather_nd(gt_scatter, gather_inds, name=tensor_name)
    # append an index to the end of each row --- commented out 30-04-2018
    # gt_tensor = tf.concat([gt_tensor, bbox_grid_exp], axis = -1)
    logt('sort_inds   ', sort_inds, verbose=verbose)
    logt('class_grid  ', class_grid, verbose=verbose)
    logt('batch_grid  ', batch_grid, verbose=verbose)
    logt('gather_inds ', gather_inds, verbose=verbose)
    logt('gt_tensor   ', gt_tensor, verbose=verbose)

    return gt_tensor