示例#1
0
 def inference(self):
     """
     inference function
     :return:
     """
     if self.is_training:
         # list as many types of layers as possible, even if they are not used now
         with slim.arg_scope(self.fpn_arg_scope()):
             final_bbox, final_scores, final_category = self.fpn(img_batch=self.images_batch,
                                                                         gtboxes_batch=self.gtboxes_batch)
         self.losses()
         # ------add detect summary----------------
         gtboxes_and_label = tf.reshape(self.gtboxes_batch, [-1, 5])
         gtboxes_in_img = show_box_in_tensor.draw_boxes_with_categories(img_batch=self.images_batch,
                                                                        boxes=gtboxes_and_label[:, :-1],
                                                                        labels=gtboxes_and_label[:, -1])
         if cfgs.ADD_BOX_IN_TENSORBOARD:
             detections_in_img = show_box_in_tensor.draw_boxes_with_categories_and_scores(
                 img_batch=self.images_batch,
                 boxes=final_bbox,
                 labels=final_category,
                 scores=final_scores)
             tf.summary.image('Compare/final_detection', detections_in_img)
         tf.summary.image('Compare/gtboxes', gtboxes_in_img)
     else:
         final_bbox, final_scores, final_category = self.fpn(img_batch=self.images_batch,
                                                                     gtboxes_batch=self.gtboxes_batch)
     return final_bbox, final_scores, final_category
示例#2
0
def train():

    with tf.Graph().as_default(), tf.device('/cpu:0'):

        num_gpu = len(cfgs.GPU_GROUP.strip().split(','))
        global_step = slim.get_or_create_global_step()
        lr = warmup_lr(cfgs.LR, global_step, cfgs.WARM_SETP, num_gpu)
        tf.summary.scalar('lr', lr)

        optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM)
        retinanet = build_whole_network.DetectionNetwork(
            base_network_name=cfgs.NET_NAME, is_training=True)

        with tf.name_scope('get_batch'):
            if cfgs.IMAGE_PYRAMID:
                shortside_len_list = tf.constant(cfgs.IMG_SHORT_SIDE_LEN)
                shortside_len = tf.random_shuffle(shortside_len_list)[0]

            else:
                shortside_len = cfgs.IMG_SHORT_SIDE_LEN

            img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch, img_h_batch, img_w_batch = \
                next_batch(dataset_name=cfgs.DATASET_NAME,
                           batch_size=cfgs.BATCH_SIZE * num_gpu,
                           shortside_len=shortside_len,
                           is_training=True)

        # data processing
        inputs_list = []
        for i in range(num_gpu):
            img = tf.expand_dims(img_batch[i], axis=0)
            if cfgs.NET_NAME in [
                    'resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d'
            ]:
                img = img / tf.constant([cfgs.PIXEL_STD])

            gtboxes_and_label_r = tf.py_func(backward_convert,
                                             inp=[gtboxes_and_label_batch[i]],
                                             Tout=tf.float32)
            gtboxes_and_label_r = tf.reshape(gtboxes_and_label_r, [-1, 6])

            gtboxes_and_label_h = get_horizen_minAreaRectangle(
                gtboxes_and_label_batch[i])
            gtboxes_and_label_h = tf.reshape(gtboxes_and_label_h, [-1, 5])

            num_objects = num_objects_batch[i]
            num_objects = tf.cast(tf.reshape(num_objects, [
                -1,
            ]), tf.float32)

            img_h = img_h_batch[i]
            img_w = img_w_batch[i]

            inputs_list.append([
                img, gtboxes_and_label_h, gtboxes_and_label_r, num_objects,
                img_h, img_w
            ])

        tower_grads = []
        biases_regularizer = tf.no_regularizer
        weights_regularizer = tf.contrib.layers.l2_regularizer(
            cfgs.WEIGHT_DECAY)

        total_loss_dict = {
            'cls_loss': tf.constant(0., tf.float32),
            'reg_loss': tf.constant(0., tf.float32),
            'total_losses': tf.constant(0., tf.float32),
        }

        with tf.variable_scope(tf.get_variable_scope()):
            for i in range(num_gpu):
                with tf.device('/gpu:%d' % i):
                    with tf.name_scope('tower_%d' % i):
                        with slim.arg_scope(
                            [slim.model_variable, slim.variable],
                                device='/device:CPU:0'):
                            with slim.arg_scope(
                                [
                                    slim.conv2d, slim.conv2d_in_plane,
                                    slim.conv2d_transpose,
                                    slim.separable_conv2d, slim.fully_connected
                                ],
                                    weights_regularizer=weights_regularizer,
                                    biases_regularizer=biases_regularizer,
                                    biases_initializer=tf.constant_initializer(
                                        0.0)):

                                gtboxes_and_label_h, gtboxes_and_label_r = tf.py_func(
                                    get_gtboxes_and_label,
                                    inp=[
                                        inputs_list[i][1], inputs_list[i][2],
                                        inputs_list[i][3]
                                    ],
                                    Tout=[tf.float32, tf.float32])
                                gtboxes_and_label_h = tf.reshape(
                                    gtboxes_and_label_h, [-1, 5])
                                gtboxes_and_label_r = tf.reshape(
                                    gtboxes_and_label_r, [-1, 6])

                                img = inputs_list[i][0]
                                img_shape = inputs_list[i][-2:]
                                img = tf.image.crop_to_bounding_box(
                                    image=img,
                                    offset_height=0,
                                    offset_width=0,
                                    target_height=tf.cast(
                                        img_shape[0], tf.int32),
                                    target_width=tf.cast(
                                        img_shape[1], tf.int32))

                                outputs = retinanet.build_whole_detection_network(
                                    input_img_batch=img,
                                    gtboxes_batch_h=gtboxes_and_label_h,
                                    gtboxes_batch_r=gtboxes_and_label_r,
                                    gpu_id=i)
                                gtboxes_in_img_h = draw_boxes_with_categories(
                                    img_batch=img,
                                    boxes=gtboxes_and_label_h[:, :-1],
                                    labels=gtboxes_and_label_h[:, -1],
                                    method=0)
                                gtboxes_in_img_r = draw_boxes_with_categories(
                                    img_batch=img,
                                    boxes=gtboxes_and_label_r[:, :-1],
                                    labels=gtboxes_and_label_r[:, -1],
                                    method=1)
                                tf.summary.image(
                                    'Compare/gtboxes_h_gpu:%d' % i,
                                    gtboxes_in_img_h)
                                tf.summary.image(
                                    'Compare/gtboxes_r_gpu:%d' % i,
                                    gtboxes_in_img_r)

                                if cfgs.ADD_BOX_IN_TENSORBOARD:
                                    detections_in_img = draw_boxes_with_categories_and_scores(
                                        img_batch=img,
                                        boxes=outputs[0],
                                        scores=outputs[1],
                                        labels=outputs[2],
                                        method=1)
                                    tf.summary.image(
                                        'Compare/final_detection_gpu:%d' % i,
                                        detections_in_img)

                                loss_dict = outputs[-1]

                                total_losses = 0.0
                                for k in loss_dict.keys():
                                    total_losses += loss_dict[k]
                                    total_loss_dict[
                                        k] += loss_dict[k] / num_gpu

                                total_losses /= num_gpu
                                total_loss_dict['total_losses'] += total_losses

                                if i == num_gpu - 1:
                                    regularization_losses = tf.get_collection(
                                        tf.GraphKeys.REGULARIZATION_LOSSES)
                                    # weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses())
                                    total_losses = total_losses + tf.add_n(
                                        regularization_losses)

                        tf.get_variable_scope().reuse_variables()
                        grads = optimizer.compute_gradients(total_losses)
                        if cfgs.GRADIENT_CLIPPING_BY_NORM is not None:
                            grads = slim.learning.clip_gradient_norms(
                                grads, cfgs.GRADIENT_CLIPPING_BY_NORM)
                        tower_grads.append(grads)

        for k in total_loss_dict.keys():
            tf.summary.scalar('{}/{}'.format(k.split('_')[0], k),
                              total_loss_dict[k])

        if len(tower_grads) > 1:
            grads = sum_gradients(tower_grads)
        else:
            grads = tower_grads[0]

        if cfgs.MUTILPY_BIAS_GRADIENT is not None:
            final_gvs = []
            with tf.variable_scope('Gradient_Mult'):
                for grad, var in grads:
                    scale = 1.
                    if '/biases:' in var.name:
                        scale *= cfgs.MUTILPY_BIAS_GRADIENT
                    if 'conv_new' in var.name:
                        scale *= 3.
                    if not np.allclose(scale, 1.0):
                        grad = tf.multiply(grad, scale)

                    final_gvs.append((grad, var))
            apply_gradient_op = optimizer.apply_gradients(
                final_gvs, global_step=global_step)
        else:
            apply_gradient_op = optimizer.apply_gradients(
                grads, global_step=global_step)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.9999, global_step)
        variables_averages_op = variable_averages.apply(
            tf.trainable_variables())

        train_op = tf.group(apply_gradient_op, variables_averages_op)
        # train_op = optimizer.apply_gradients(final_gvs, global_step=global_step)
        summary_op = tf.summary.merge_all()

        restorer, restore_ckpt = retinanet.get_restorer()
        saver = tf.train.Saver(max_to_keep=5)

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())

        tfconfig = tf.ConfigProto(allow_soft_placement=True,
                                  log_device_placement=False)
        tfconfig.gpu_options.allow_growth = True
        with tf.Session(config=tfconfig) as sess:
            sess.run(init_op)

            # sess.run(tf.initialize_all_variables())
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord, sess=sess)

            summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION)
            tools.mkdir(summary_path)
            summary_writer = tf.summary.FileWriter(summary_path,
                                                   graph=sess.graph)

            if not restorer is None:
                restorer.restore(sess, restore_ckpt)
                print('restore model')

            for step in range(cfgs.MAX_ITERATION // num_gpu):
                training_time = time.strftime('%Y-%m-%d %H:%M:%S',
                                              time.localtime(time.time()))

                if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0:
                    _, global_stepnp = sess.run([train_op, global_step])

                else:
                    if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0:
                        start = time.time()

                        _, global_stepnp, total_loss_dict_ = \
                            sess.run([train_op, global_step, total_loss_dict])

                        end = time.time()

                        print('***' * 20)
                        print("""%s: global_step:%d  current_step:%d""" %
                              (training_time,
                               (global_stepnp - 1) * num_gpu, step * num_gpu))
                        print("""per_cost_time:%.3fs""" %
                              ((end - start) / num_gpu))
                        loss_str = ''
                        for k in total_loss_dict_.keys():
                            loss_str += '%s:%.3f\n' % (k, total_loss_dict_[k])
                        print(loss_str)

                        if np.isnan(total_loss_dict_['total_losses']):
                            sys.exit(0)

                    else:
                        if step % cfgs.SMRY_ITER == 0:
                            _, global_stepnp, summary_str = sess.run(
                                [train_op, global_step, summary_op])
                            summary_writer.add_summary(
                                summary_str, (global_stepnp - 1) * num_gpu)
                            summary_writer.flush()

                if (step > 0 and step % (cfgs.SAVE_WEIGHTS_INTE // num_gpu)
                        == 0) or (step >= cfgs.MAX_ITERATION // num_gpu - 1):

                    save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION)
                    if not os.path.exists(save_dir):
                        os.mkdir(save_dir)

                    save_ckpt = os.path.join(
                        save_dir, '{}_'.format(cfgs.DATASET_NAME) + str(
                            (global_stepnp - 1) * num_gpu) + 'model.ckpt')
                    saver.save(sess, save_ckpt)
                    print(' weights had been saved')

            coord.request_stop()
            coord.join(threads)
示例#3
0
def train():

    faster_rcnn = build_whole_network.DetectionNetwork(
        base_network_name=cfgs.NET_NAME, is_training=True)

    with tf.name_scope('get_batch'):
        img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch = \
            next_batch(dataset_name=cfgs.DATASET_NAME,  # 'pascal', 'coco'
                       batch_size=cfgs.BATCH_SIZE,
                       shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
                       is_training=True)
        gtboxes_and_label = tf.reshape(gtboxes_and_label_batch, [-1, 5])

    biases_regularizer = tf.no_regularizer
    weights_regularizer = tf.contrib.layers.l2_regularizer(cfgs.WEIGHT_DECAY)

    # list as many types of layers as possible, even if they are not used now
    with slim.arg_scope([slim.conv2d, slim.conv2d_in_plane, \
                         slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected],
                        weights_regularizer=weights_regularizer,
                        biases_regularizer=biases_regularizer,
                        biases_initializer=tf.constant_initializer(0.0)):
        final_bbox, final_scores, final_category, loss_dict = faster_rcnn.build_whole_detection_network(
            input_img_batch=img_batch, gtboxes_batch=gtboxes_and_label)

    # ----------------------------------------------------------------------------------------------------build loss
    weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses())
    rpn_location_loss = loss_dict['rpn_loc_loss']
    rpn_cls_loss = loss_dict['rpn_cls_loss']
    rpn_total_loss = rpn_location_loss + rpn_cls_loss

    fastrcnn_cls_loss = loss_dict['fastrcnn_cls_loss']
    fastrcnn_loc_loss = loss_dict['fastrcnn_loc_loss']
    fastrcnn_total_loss = fastrcnn_cls_loss + fastrcnn_loc_loss

    total_loss = rpn_total_loss + fastrcnn_total_loss + weight_decay_loss
    # ____________________________________________________________________________________________________build loss

    # ---------------------------------------------------------------------------------------------------add summary

    tf.summary.scalar('RPN_LOSS/cls_loss', rpn_cls_loss)
    tf.summary.scalar('RPN_LOSS/location_loss', rpn_location_loss)
    tf.summary.scalar('RPN_LOSS/rpn_total_loss', rpn_total_loss)

    tf.summary.scalar('FAST_LOSS/fastrcnn_cls_loss', fastrcnn_cls_loss)
    tf.summary.scalar('FAST_LOSS/fastrcnn_location_loss', fastrcnn_loc_loss)
    tf.summary.scalar('FAST_LOSS/fastrcnn_total_loss', fastrcnn_total_loss)

    tf.summary.scalar('LOSS/total_loss', total_loss)
    tf.summary.scalar('LOSS/regular_weights', weight_decay_loss)

    gtboxes_in_img = show_box_in_tensor.draw_boxes_with_categories(
        img_batch=img_batch,
        boxes=gtboxes_and_label[:, :-1],
        labels=gtboxes_and_label[:, -1])
    if cfgs.ADD_BOX_IN_TENSORBOARD:
        detections_in_img = show_box_in_tensor.draw_boxes_with_categories_and_scores(
            img_batch=img_batch,
            boxes=final_bbox,
            labels=final_category,
            scores=final_scores)
        tf.summary.image('Compare/final_detection', detections_in_img)
    tf.summary.image('Compare/gtboxes', gtboxes_in_img)

    # ___________________________________________________________________________________________________add summary

    global_step = slim.get_or_create_global_step()
    lr = tf.train.piecewise_constant(
        global_step,
        boundaries=[
            np.int64(cfgs.DECAY_STEP[0]),
            np.int64(cfgs.DECAY_STEP[1])
        ],
        values=[cfgs.LR, cfgs.LR / 10., cfgs.LR / 100.])
    tf.summary.scalar('lr', lr)
    optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM)
    # optimizer = tf.train.AdamOptimizer(lr)

    # ---------------------------------------------------------------------------------------------compute gradients
    gradients = faster_rcnn.get_gradients(optimizer, total_loss)

    # enlarge_gradients for bias
    if cfgs.MUTILPY_BIAS_GRADIENT:
        gradients = faster_rcnn.enlarge_gradients_for_bias(gradients)

    if cfgs.GRADIENT_CLIPPING_BY_NORM:
        with tf.name_scope('clip_gradients_YJR'):
            gradients = slim.learning.clip_gradient_norms(
                gradients, cfgs.GRADIENT_CLIPPING_BY_NORM)
    # _____________________________________________________________________________________________compute gradients

    # train_op
    train_op = optimizer.apply_gradients(grads_and_vars=gradients,
                                         global_step=global_step)
    summary_op = tf.summary.merge_all()
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    restorer, restore_ckpt = faster_rcnn.get_restorer()
    saver = tf.train.Saver(max_to_keep=30)

    # Create session
    #allow_soft_placement=True自动将无法放到GPU上的操作放回到CPU
    config = tf.ConfigProto(allow_soft_placement=True)
    #让GPU按需分配,不一定占用某个GPU的全部内存
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        sess.run(init_op)
        if not restorer is None:
            restorer.restore(sess, restore_ckpt)
            print('restore model')
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess, coord)

        #日志保存地址
        summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION)
        tools.mkdir(summary_path)
        summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph)
        timer = Timer()
        for step in range(cfgs.MAX_ITERATION):
            training_time = time.strftime('%Y-%m-%d %H:%M:%S',
                                          time.localtime(time.time()))

            if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0:
                _, global_stepnp = sess.run([train_op, global_step])

            else:
                if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0:

                    start = time.time()
                    _, global_stepnp, img_name, rpnLocLoss, rpnClsLoss, rpnTotalLoss, \
                    fastrcnnLocLoss, fastrcnnClsLoss, fastrcnnTotalLoss, totalLoss = \
                        sess.run(
                            [train_op, global_step, img_name_batch, rpn_location_loss, rpn_cls_loss, rpn_total_loss,
                             fastrcnn_loc_loss, fastrcnn_cls_loss, fastrcnn_total_loss, total_loss])
                    end = time.time()

                    print('{}: step:{}/{}iter'.format(training_time,
                                                      global_stepnp,
                                                      cfgs.MAX_ITERATION))
                    print('>>> rpn_loc_loss:{:.4f}  |  rpn_cla_loss:{:.4f}  |  rpn_total_loss:{:.4f}\n'
                          '>>> fast_rcnn_loc_loss:{:.4f}  |  fast_rcnn_cla_loss:{:.4f}  |  fast_rcnn_total_loss:{:.4f}\n'
                          '>>> single_time:{:.4f}s  |   total_loss:{:.4f} '\
                          .format(rpnLocLoss, rpnClsLoss,rpnTotalLoss, fastrcnnLocLoss, fastrcnnClsLoss,
                          fastrcnnTotalLoss,(end-start), totalLoss))
                    print(
                        '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
                    )

                else:
                    if step % cfgs.SMRY_ITER == 0:
                        _, global_stepnp, summary_str = sess.run(
                            [train_op, global_step, summary_op])
                        summary_writer.add_summary(summary_str, global_stepnp)
                        summary_writer.flush()

            if (step > 0 and step % cfgs.SAVE_WEIGHTS_INTE
                    == 0) or (step == cfgs.MAX_ITERATION - 1):

                save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION)
                if not os.path.exists(save_dir):
                    os.mkdir(save_dir)

                save_ckpt = os.path.join(
                    save_dir, 'voc_' + str(global_stepnp) + 'model.ckpt')
                saver.save(sess, save_ckpt)
                print(' weights had been saved')

        coord.request_stop()
        coord.join(threads)
def train():

    with tf.Graph().as_default(), tf.device('/cpu:0'):

        global_step = slim.get_or_create_global_step()

        lr = warmup_lr(cfgs.LR, global_step, cfgs.WARM_SETP, cfgs.NUM_GPU*cfgs.BATCH_SIZE)
        tf.summary.scalar('lr', lr)

        optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM)
        faster_rcnn = build_whole_network.DetectionNetwork(base_network_name=cfgs.NET_NAME,
                                                           is_training=True,
                                                           batch_size=cfgs.BATCH_SIZE)

        with tf.name_scope('get_batch'):
            img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch, img_h_batch, img_w_batch = \
                next_batch(dataset_name=cfgs.DATASET_NAME,  # 'pascal', 'coco'
                           batch_size=cfgs.BATCH_SIZE * cfgs.NUM_GPU,
                           shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
                           is_training=True)

        # data processing
        inputs_list = []
        for i in range(cfgs.NUM_GPU):
            start = i*cfgs.BATCH_SIZE
            end = (i+1)*cfgs.BATCH_SIZE
            img = img_batch[start:end, :, :, :]
            if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']:
                img = img / tf.constant([cfgs.PIXEL_STD])

            gtboxes_and_label = tf.cast(tf.reshape(gtboxes_and_label_batch[start:end, :, :],
                                                   [cfgs.BATCH_SIZE, -1, 5]), tf.float32)
            num_objects = num_objects_batch[start:end]
            num_objects = tf.cast(tf.reshape(num_objects, [cfgs.BATCH_SIZE, -1, ]), tf.float32)

            img_h = img_h_batch[start:end]
            img_w = img_w_batch[start:end]
            # img_h = tf.cast(tf.reshape(img_h, [-1, ]), tf.float32)
            # img_w = tf.cast(tf.reshape(img_w, [-1, ]), tf.float32)

            inputs_list.append([img, gtboxes_and_label, num_objects, img_h, img_w])

        # put_op_list = []
        # get_op_list = []
        # for i in range(cfgs.NUM_GPU):
        #     with tf.device("/GPU:%s" % i):
        #         area = tf.contrib.staging.StagingArea(
        #             dtypes=[tf.float32, tf.float32, tf.float32])
        #         put_op_list.append(area.put(inputs_list[i]))
        #         get_op_list.append(area.get())

        tower_grads = []
        biases_regularizer = tf.no_regularizer
        weights_regularizer = tf.contrib.layers.l2_regularizer(cfgs.WEIGHT_DECAY)

        total_loss_dict = {
            'rpn_cls_loss': tf.constant(0., tf.float32),
            'rpn_bbox_loss': tf.constant(0., tf.float32),
            'rpn_ctr_loss': tf.constant(0., tf.float32),
            'total_losses': tf.constant(0., tf.float32),
        }

        with tf.variable_scope(tf.get_variable_scope()):
            for i in range(cfgs.NUM_GPU):
                with tf.device('/gpu:%d' % i):
                    with tf.name_scope('tower_%d' % i):
                        with slim.arg_scope(
                                [slim.model_variable, slim.variable],
                                device='/device:CPU:0'):
                            with slim.arg_scope([slim.conv2d, slim.conv2d_in_plane,
                                                 slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected],
                                                weights_regularizer=weights_regularizer,
                                                biases_regularizer=biases_regularizer,
                                                biases_initializer=tf.constant_initializer(0.0)):

                                gtboxes_and_label = tf.py_func(get_gtboxes_and_label,
                                                               inp=[inputs_list[i][1], inputs_list[i][2]],
                                                               Tout=tf.float32)
                                gtboxes_and_label = tf.reshape(gtboxes_and_label, [cfgs.BATCH_SIZE, -1, 5])

                                img = inputs_list[i][0]
                                img_shape = inputs_list[i][-2:]
                                h_crop = tf.reduce_max(img_shape[0])
                                w_crop = tf.reduce_max(img_shape[1])
                                img = tf.image.crop_to_bounding_box(image=img,
                                                                    offset_height=0,
                                                                    offset_width=0,
                                                                    target_height=tf.cast(h_crop, tf.int32),
                                                                    target_width=tf.cast(w_crop, tf.int32))

                                outputs = faster_rcnn.build_whole_detection_network(input_img_batch=img,
                                                                                    gtboxes_batch=gtboxes_and_label)
                                gtboxes_in_img = show_box_in_tensor.draw_boxes_with_categories(img_batch=img[0, :, :, :],
                                                                                               boxes=gtboxes_and_label[0, :, :-1],
                                                                                               labels=gtboxes_and_label[0, :, -1])
                                gtboxes_in_img = tf.expand_dims(gtboxes_in_img, 0)
                                tf.summary.image('Compare/gtboxes_gpu:%d' % i, gtboxes_in_img)

                                if cfgs.ADD_BOX_IN_TENSORBOARD:
                                    detections_in_img = show_box_in_tensor.draw_boxes_with_categories_and_scores(
                                        img_batch=img[0, :, :, :],
                                        boxes=outputs[0],
                                        scores=outputs[1],
                                        labels=outputs[2])
                                    detections_in_img = tf.expand_dims(detections_in_img, 0)
                                    tf.summary.image('Compare/final_detection_gpu:%d' % i, detections_in_img)

                                loss_dict = outputs[-1]

                                total_losses = 0.0
                                for k in loss_dict.keys():
                                    total_losses += loss_dict[k]
                                    total_loss_dict[k] += loss_dict[k] / cfgs.NUM_GPU

                                total_losses = total_losses / cfgs.NUM_GPU
                                total_loss_dict['total_losses'] += total_losses

                                if i == cfgs.NUM_GPU - 1:
                                    regularization_losses = tf.get_collection(
                                        tf.GraphKeys.REGULARIZATION_LOSSES)
                                    # weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses())
                                    total_losses = total_losses + tf.add_n(regularization_losses)

                        tf.get_variable_scope().reuse_variables()

                        grads = optimizer.compute_gradients(total_losses)
                        tower_grads.append(grads)

        for k in total_loss_dict.keys():
            tf.summary.scalar('{}/{}'.format(k.split('_')[0], k), total_loss_dict[k])

        if len(tower_grads) > 1:
            grads = sum_gradients(tower_grads)
        else:
            grads = tower_grads[0]

        # final_gvs = []
        # with tf.variable_scope('Gradient_Mult'):
        #     for grad, var in grads:
        #         scale = 1.
        #         # if '/biases:' in var.name:
        #         #    scale *= 2.
        #         if 'conv_new' in var.name:
        #             scale *= 3.
        #         if not np.allclose(scale, 1.0):
        #             grad = tf.multiply(grad, scale)
        #         final_gvs.append((grad, var))

        apply_gradient_op = optimizer.apply_gradients(grads, global_step=global_step)

        variable_averages = tf.train.ExponentialMovingAverage(0.9999, global_step)
        variables_averages_op = variable_averages.apply(tf.trainable_variables())

        train_op = tf.group(apply_gradient_op, variables_averages_op)
        # train_op = optimizer.apply_gradients(final_gvs, global_step=global_step)
        summary_op = tf.summary.merge_all()

        restorer, restore_ckpt = faster_rcnn.get_restorer()
        saver = tf.train.Saver(max_to_keep=10)

        init_op = tf.group(
            tf.global_variables_initializer(),
            tf.local_variables_initializer()
        )

        tfconfig = tf.ConfigProto(
            allow_soft_placement=True, log_device_placement=False)
        tfconfig.gpu_options.allow_growth = True

        num_per_iter = cfgs.NUM_GPU * cfgs.BATCH_SIZE
        with tf.Session(config=tfconfig) as sess:
            sess.run(init_op)

            # sess.run(tf.initialize_all_variables())
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord, sess=sess)

            summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION)
            tools.mkdir(summary_path)
            summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph)

            if not restorer is None:
                restorer.restore(sess, restore_ckpt)
                print('restore model')

            for step in range(cfgs.MAX_ITERATION // num_per_iter):
                training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))

                if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0:
                    _, global_stepnp = sess.run([train_op, global_step])

                else:
                    if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0:
                        start = time.time()

                        _, global_stepnp, total_loss_dict_ = \
                            sess.run([train_op, global_step, total_loss_dict])

                        end = time.time()

                        print('***'*20)
                        print("""%s: global_step:%d  current_step:%d"""
                              % (training_time, (global_stepnp-1)*num_per_iter, step*num_per_iter))
                        print("""per_cost_time:%.3fs"""
                              % ((end - start) / num_per_iter))
                        loss_str = ''
                        for k in total_loss_dict_.keys():
                            loss_str += '%s:%.3f\n' % (k, total_loss_dict_[k])
                        print(loss_str)

                    else:
                        if step % cfgs.SMRY_ITER == 0:
                            _, global_stepnp, summary_str = sess.run([train_op, global_step, summary_op])
                            summary_writer.add_summary(summary_str, (global_stepnp-1)*num_per_iter)
                            summary_writer.flush()

                if (step > 0 and step % (cfgs.SAVE_WEIGHTS_INTE // num_per_iter) == 0) or (step >= cfgs.MAX_ITERATION // cfgs.NUM_GPU - 1):

                    save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION)
                    if not os.path.exists(save_dir):
                        os.mkdir(save_dir)

                    save_ckpt = os.path.join(save_dir, 'coco_' + str((global_stepnp-1)*num_per_iter) + 'model.ckpt')
                    saver.save(sess, save_ckpt)
                    print(' weights had been saved')

            coord.request_stop()
            coord.join(threads)
def train():

    faster_rcnn = build_whole_network.DetectionNetwork(base_network_name=cfgs.NET_NAME,
                                                       is_training=True)

    with tf.name_scope('get_batch'):
        img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3])
        gtbox_plac = tf.placeholder(dtype=tf.int32, shape=[None, 5])

        img_batch, gtboxes_and_label = preprocess_img(img_plac, gtbox_plac)
        # gtboxes_and_label = tf.reshape(gtboxes_and_label_batch, [-1, 5])

    biases_regularizer = tf.no_regularizer
    weights_regularizer = tf.contrib.layers.l2_regularizer(cfgs.WEIGHT_DECAY)

    # list as many types of layers as possible, even if they are not used now
    with slim.arg_scope([slim.conv2d, slim.conv2d_in_plane, \
                         slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected],
                        weights_regularizer=weights_regularizer,
                        biases_regularizer=biases_regularizer,
                        biases_initializer=tf.constant_initializer(0.0)):
        final_bbox, final_scores, final_category, loss_dict = faster_rcnn.build_whole_detection_network(
            input_img_batch=img_batch,
            gtboxes_batch=gtboxes_and_label)

    # ----------------------------------------------------------------------------------------------------build loss
    weight_decay_loss = 0 # tf.add_n(slim.losses.get_regularization_losses())
    rpn_location_loss = loss_dict['rpn_loc_loss']
    rpn_cls_loss = loss_dict['rpn_cls_loss']
    rpn_total_loss = rpn_location_loss + rpn_cls_loss

    fastrcnn_cls_loss = loss_dict['fastrcnn_cls_loss']
    fastrcnn_loc_loss = loss_dict['fastrcnn_loc_loss']
    fastrcnn_total_loss = fastrcnn_cls_loss + fastrcnn_loc_loss

    total_loss = rpn_total_loss + fastrcnn_total_loss + weight_decay_loss
    # ____________________________________________________________________________________________________build loss



    # ---------------------------------------------------------------------------------------------------add summary
    tf.summary.scalar('RPN_LOSS/cls_loss', rpn_cls_loss)
    tf.summary.scalar('RPN_LOSS/location_loss', rpn_location_loss)
    tf.summary.scalar('RPN_LOSS/rpn_total_loss', rpn_total_loss)

    tf.summary.scalar('FAST_LOSS/fastrcnn_cls_loss', fastrcnn_cls_loss)
    tf.summary.scalar('FAST_LOSS/fastrcnn_location_loss', fastrcnn_loc_loss)
    tf.summary.scalar('FAST_LOSS/fastrcnn_total_loss', fastrcnn_total_loss)

    tf.summary.scalar('LOSS/total_loss', total_loss)
    tf.summary.scalar('LOSS/regular_weights', weight_decay_loss)

    gtboxes_in_img = show_box_in_tensor.draw_boxes_with_categories(img_batch=img_batch,
                                                                   boxes=gtboxes_and_label[:, :-1],
                                                                   labels=gtboxes_and_label[:, -1])
    if cfgs.ADD_BOX_IN_TENSORBOARD:
        detections_in_img = show_box_in_tensor.draw_boxes_with_categories_and_scores(img_batch=img_batch,
                                                                                     boxes=final_bbox,
                                                                                     labels=final_category,
                                                                                     scores=final_scores)
        tf.summary.image('Compare/final_detection', detections_in_img)
    tf.summary.image('Compare/gtboxes', gtboxes_in_img)

    # ___________________________________________________________________________________________________add summary

    global_step = slim.get_or_create_global_step()
    lr = tf.train.piecewise_constant(global_step,
                                     boundaries=[np.int64(cfgs.DECAY_STEP[0]), np.int64(cfgs.DECAY_STEP[1])],
                                     values=[cfgs.LR, cfgs.LR / 10., cfgs.LR / 100.])
    tf.summary.scalar('lr', lr)
    optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM)

    # ---------------------------------------------------------------------------------------------compute gradients
    gradients = faster_rcnn.get_gradients(optimizer, total_loss)

    # enlarge_gradients for bias
    if cfgs.MUTILPY_BIAS_GRADIENT:
        gradients = faster_rcnn.enlarge_gradients_for_bias(gradients)

    if cfgs.GRADIENT_CLIPPING_BY_NORM:
        with tf.name_scope('clip_gradients_YJR'):
            gradients = slim.learning.clip_gradient_norms(gradients,
                                                          cfgs.GRADIENT_CLIPPING_BY_NORM)
    # _____________________________________________________________________________________________compute gradients



    # train_op
    train_op = optimizer.apply_gradients(grads_and_vars=gradients,
                                         global_step=global_step)
    summary_op = tf.summary.merge_all()
    init_op = tf.group(
        tf.global_variables_initializer(),
        tf.local_variables_initializer()
    )

    restorer, restore_ckpt = faster_rcnn.get_restorer()
    saver = tf.train.Saver(max_to_keep=30)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        sess.run(init_op)
        if not restorer is None:
            restorer.restore(sess, restore_ckpt)
            print('restore model')

        summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION)
        tools.mkdir(summary_path)
        summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph)

        for step in range(cfgs.MAX_ITERATION):

            img_id, img, gt_info = next_img(step=step)
            training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))

            if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0:
                _, global_stepnp = sess.run([train_op, global_step],
                                            feed_dict={img_plac: img,
                                                       gtbox_plac: gt_info}
                                            )

            else:
                if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0:
                    start = time.time()

                    _, global_stepnp, rpnLocLoss, rpnClsLoss, rpnTotalLoss, \
                    fastrcnnLocLoss, fastrcnnClsLoss, fastrcnnTotalLoss, totalLoss = \
                        sess.run(
                            [train_op, global_step, rpn_location_loss, rpn_cls_loss, rpn_total_loss,
                             fastrcnn_loc_loss, fastrcnn_cls_loss, fastrcnn_total_loss, total_loss],
                        feed_dict={img_plac: img,
                                   gtbox_plac: gt_info})
                    end = time.time()
                    print(""" {}: step{}    image_name:{} |\t
                              rpn_loc_loss:{} |\t rpn_cla_loss:{} |\t rpn_total_loss:{} |
                              fast_rcnn_loc_loss:{} |\t fast_rcnn_cla_loss:{} |\t fast_rcnn_total_loss:{} |
                              total_loss:{} |\t per_cost_time:{}s""" \
                          .format(training_time, global_stepnp, str(img_id), rpnLocLoss, rpnClsLoss,
                                  rpnTotalLoss, fastrcnnLocLoss, fastrcnnClsLoss, fastrcnnTotalLoss, totalLoss,
                                  (end - start)))
                else:
                    if step % cfgs.SMRY_ITER == 0:
                        _, global_stepnp, summary_str = sess.run([train_op, global_step, summary_op],
                                                                 feed_dict={img_plac: img,
                                                                            gtbox_plac: gt_info}
                                                                 )
                        summary_writer.add_summary(summary_str, global_stepnp)
                        summary_writer.flush()

            if (step > 0 and step % cfgs.SAVE_WEIGHTS_INTE == 0) or (step == cfgs.MAX_ITERATION - 1):

                save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION)
                if not os.path.exists(save_dir):
                    os.mkdir(save_dir)

                save_ckpt = os.path.join(save_dir, 'voc_' + str(global_stepnp) + 'model.ckpt')
                saver.save(sess, save_ckpt)
                print(' weights had been saved')
示例#6
0
def tower_loss(scope):
    with tf.name_scope(scope):
        faster_rcnn = build_whole_network.DetectionNetwork(
            base_network_name=cfgs.NET_NAME, is_training=True)

        with tf.name_scope('get_batch'):
            img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch = \
                next_batch(dataset_name=cfgs.DATASET_NAME,  # 'pascal', 'coco'
                           batch_size=cfgs.BATCH_SIZE,
                           shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
                           is_training=True)
            gtboxes_and_label = tf.reshape(gtboxes_and_label_batch, [-1, 5])

        biases_regularizer = tf.no_regularizer
        weights_regularizer = tf.contrib.layers.l2_regularizer(
            cfgs.WEIGHT_DECAY)

        # list as many types of layers as possible, even if they are not used now
        with slim.arg_scope([
                slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose,
                slim.separable_conv2d, slim.fully_connected
        ],
                            weights_regularizer=weights_regularizer,
                            biases_regularizer=biases_regularizer,
                            biases_initializer=tf.constant_initializer(0.0)):
            final_bbox, final_scores, final_category, loss_dict = faster_rcnn.build_whole_detection_network(
                input_img_batch=img_batch, gtboxes_batch=gtboxes_and_label)

        # ----------------------------------------------------------------------------------------------------build loss
        weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses())
        rpn_location_loss = loss_dict['rpn_loc_loss']
        rpn_cls_loss = loss_dict['rpn_cls_loss']
        rpn_total_loss = rpn_location_loss + rpn_cls_loss

        fastrcnn_cls_loss = loss_dict['fastrcnn_cls_loss']
        fastrcnn_loc_loss = loss_dict['fastrcnn_loc_loss']
        fastrcnn_total_loss = fastrcnn_cls_loss + fastrcnn_loc_loss

        total_loss = rpn_total_loss + fastrcnn_total_loss + weight_decay_loss
        # ____________________________________________________________________________________________________build loss

        # ---------------------------------------------------------------------------------------------------add summary
        tf.summary.scalar('RPN_LOSS/cls_loss', rpn_cls_loss)
        tf.summary.scalar('RPN_LOSS/location_loss', rpn_location_loss)
        tf.summary.scalar('RPN_LOSS/rpn_total_loss', rpn_total_loss)

        tf.summary.scalar('FAST_LOSS/fastrcnn_cls_loss', fastrcnn_cls_loss)
        tf.summary.scalar('FAST_LOSS/fastrcnn_location_loss',
                          fastrcnn_loc_loss)
        tf.summary.scalar('FAST_LOSS/fastrcnn_total_loss', fastrcnn_total_loss)

        tf.summary.scalar('LOSS/total_loss', total_loss)
        tf.summary.scalar('LOSS/regular_weights', weight_decay_loss)

        gtboxes_in_img = show_box_in_tensor.draw_boxes_with_categories(
            img_batch=img_batch,
            boxes=gtboxes_and_label[:, :-1],
            labels=gtboxes_and_label[:, -1])
        if cfgs.ADD_BOX_IN_TENSORBOARD:
            detections_in_img = show_box_in_tensor.draw_boxes_with_categories_and_scores(
                img_batch=img_batch,
                boxes=final_bbox,
                labels=final_category,
                scores=final_scores)
            tf.summary.image('Compare/final_detection', detections_in_img)
        tf.summary.image('Compare/gtboxes', gtboxes_in_img)

        return total_loss, faster_rcnn, img_name_batch, rpn_location_loss, rpn_cls_loss, rpn_total_loss,\
               fastrcnn_loc_loss, fastrcnn_cls_loss, fastrcnn_total_loss
    def build_whole_detection_network(self, input_img_batch, gtboxes_r_batch,
                                      gtboxes_h_batch):

        if self.is_training:
            # ensure shape is [M, 5] and [M, 6]
            gtboxes_r_batch = tf.reshape(gtboxes_r_batch, [-1, 6])
            gtboxes_h_batch = tf.reshape(gtboxes_h_batch, [-1, 5])
            gtboxes_r_batch = tf.cast(gtboxes_r_batch, tf.float32)
            gtboxes_h_batch = tf.cast(gtboxes_h_batch, tf.float32)

        img_shape = tf.shape(input_img_batch)

        # 1. build base network
        feature_to_cropped = self.build_base_network(input_img_batch)

        # 2. build rpn
        with tf.variable_scope('build_rpn',
                               regularizer=slim.l2_regularizer(
                                   cfgs.WEIGHT_DECAY)):

            rpn_conv3x3 = slim.conv2d(feature_to_cropped,
                                      512, [3, 3],
                                      trainable=self.is_training,
                                      weights_initializer=cfgs.INITIALIZER,
                                      activation_fn=tf.nn.relu,
                                      scope='rpn_conv/3x3')
            rpn_cls_score = slim.conv2d(rpn_conv3x3,
                                        self.num_anchors_per_location * 2,
                                        [1, 1],
                                        stride=1,
                                        trainable=self.is_training,
                                        weights_initializer=cfgs.INITIALIZER,
                                        activation_fn=None,
                                        scope='rpn_cls_score')
            rpn_box_pred = slim.conv2d(
                rpn_conv3x3,
                self.num_anchors_per_location * 4, [1, 1],
                stride=1,
                trainable=self.is_training,
                weights_initializer=cfgs.BBOX_INITIALIZER,
                activation_fn=None,
                scope='rpn_bbox_pred')
            rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
            rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2])
            rpn_cls_prob = slim.softmax(rpn_cls_score, scope='rpn_cls_prob')

        # 3. generate_anchors
        featuremap_height, featuremap_width = tf.shape(
            feature_to_cropped)[1], tf.shape(feature_to_cropped)[2]
        featuremap_height = tf.cast(featuremap_height, tf.float32)
        featuremap_width = tf.cast(featuremap_width, tf.float32)

        anchors = anchor_utils.make_anchors(
            base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[0],
            anchor_scales=cfgs.ANCHOR_SCALES,
            anchor_ratios=cfgs.ANCHOR_RATIOS,
            featuremap_height=featuremap_height,
            featuremap_width=featuremap_width,
            stride=cfgs.ANCHOR_STRIDE,
            name="make_anchors_forRPN")

        # with tf.variable_scope('make_anchors'):
        #     anchors = anchor_utils.make_anchors(height=featuremap_height,
        #                                         width=featuremap_width,
        #                                         feat_stride=cfgs.ANCHOR_STRIDE[0],
        #                                         anchor_scales=cfgs.ANCHOR_SCALES,
        #                                         anchor_ratios=cfgs.ANCHOR_RATIOS, base_size=16
        #                                         )

        # 4. postprocess rpn proposals. such as: decode, clip, NMS
        with tf.variable_scope('postprocess_RPN'):
            # rpn_cls_prob = tf.reshape(rpn_cls_score, [-1, 2])
            # rpn_cls_prob = slim.softmax(rpn_cls_prob, scope='rpn_cls_prob')
            # rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
            rois, roi_scores = postprocess_rpn_proposals(
                rpn_bbox_pred=rpn_box_pred,
                rpn_cls_prob=rpn_cls_prob,
                img_shape=img_shape,
                anchors=anchors,
                is_training=self.is_training)
            # rois shape [-1, 4]
            # +++++++++++++++++++++++++++++++++++++add img smry+++++++++++++++++++++++++++++++++++++++++++++++++++++++

            if self.is_training:
                rois_in_img = show_box_in_tensor.draw_boxes_with_categories(
                    img_batch=input_img_batch, boxes=rois, scores=roi_scores)
                tf.summary.image('all_rpn_rois', rois_in_img)

                score_gre_05 = tf.reshape(
                    tf.where(tf.greater_equal(roi_scores, 0.5)), [-1])
                score_gre_05_rois = tf.gather(rois, score_gre_05)
                score_gre_05_score = tf.gather(roi_scores, score_gre_05)
                score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_categories(
                    img_batch=input_img_batch,
                    boxes=score_gre_05_rois,
                    scores=score_gre_05_score)
                tf.summary.image('score_greater_05_rois', score_gre_05_in_img)
            # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

        if self.is_training:
            with tf.variable_scope('sample_anchors_minibatch'):
                rpn_labels, rpn_bbox_targets = \
                    tf.py_func(
                        anchor_target_layer,
                        [gtboxes_h_batch, img_shape, anchors],
                        [tf.float32, tf.float32])
                rpn_bbox_targets = tf.reshape(rpn_bbox_targets, [-1, 4])
                rpn_labels = tf.to_int32(rpn_labels, name="to_int32")
                rpn_labels = tf.reshape(rpn_labels, [-1])
                self.add_anchor_img_smry(input_img_batch, anchors, rpn_labels)

            # --------------------------------------add smry-----------------------------------------------------------

            rpn_cls_category = tf.argmax(rpn_cls_prob, axis=1)
            kept_rpppn = tf.reshape(tf.where(tf.not_equal(rpn_labels, -1)),
                                    [-1])
            rpn_cls_category = tf.gather(rpn_cls_category, kept_rpppn)
            acc = tf.reduce_mean(
                tf.to_float(
                    tf.equal(rpn_cls_category,
                             tf.to_int64(tf.gather(rpn_labels, kept_rpppn)))))
            tf.summary.scalar('ACC/rpn_accuracy', acc)

            with tf.control_dependencies([rpn_labels]):
                with tf.variable_scope('sample_RCNN_minibatch'):
                    rois, labels, bbox_targets_h, bbox_targets_r = \
                    tf.py_func(proposal_target_layer,
                               [rois, gtboxes_h_batch, gtboxes_r_batch],
                               [tf.float32, tf.float32, tf.float32, tf.float32])

                    rois = tf.reshape(rois, [-1, 4])
                    labels = tf.to_int32(labels)
                    labels = tf.reshape(labels, [-1])
                    bbox_targets_h = tf.reshape(bbox_targets_h,
                                                [-1, 4 * (cfgs.CLASS_NUM + 1)])
                    bbox_targets_r = tf.reshape(bbox_targets_r,
                                                [-1, 5 * (cfgs.CLASS_NUM + 1)])
                    self.add_roi_batch_img_smry(input_img_batch, rois, labels)

        # -------------------------------------------------------------------------------------------------------------#
        #                                            Fast-RCNN                                                         #
        # -------------------------------------------------------------------------------------------------------------#

        # 5. build Fast-RCNN
        # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10)
        bbox_pred_h, cls_score_h, bbox_pred_r, cls_score_r = self.build_fastrcnn(
            feature_to_cropped=feature_to_cropped,
            rois=rois,
            img_shape=img_shape)
        # bbox_pred shape: [-1, 4*(cls_num+1)].
        # cls_score shape: [-1, cls_num+1]

        cls_prob_h = slim.softmax(cls_score_h, 'cls_prob_h')
        cls_prob_r = slim.softmax(cls_score_r, 'cls_prob_r')

        # ----------------------------------------------add smry-------------------------------------------------------
        if self.is_training:
            cls_category_h = tf.argmax(cls_prob_h, axis=1)
            fast_acc_h = tf.reduce_mean(
                tf.to_float(tf.equal(cls_category_h, tf.to_int64(labels))))
            tf.summary.scalar('ACC/fast_acc_h', fast_acc_h)

            cls_category_r = tf.argmax(cls_prob_r, axis=1)
            fast_acc_r = tf.reduce_mean(
                tf.to_float(tf.equal(cls_category_r, tf.to_int64(labels))))
            tf.summary.scalar('ACC/fast_acc_r', fast_acc_r)

        #  6. postprocess_fastrcnn
        if not self.is_training:
            final_boxes_h, final_scores_h, final_category_h = self.postprocess_fastrcnn_h(
                rois=rois,
                bbox_ppred=bbox_pred_h,
                scores=cls_prob_h,
                img_shape=img_shape)
            final_boxes_r, final_scores_r, final_category_r = self.postprocess_fastrcnn_r(
                rois=rois,
                bbox_ppred=bbox_pred_r,
                scores=cls_prob_r,
                img_shape=img_shape)
            return final_boxes_h, final_scores_h, final_category_h, final_boxes_r, final_scores_r, final_category_r
        else:
            '''
            when trian. We need build Loss
            '''
            loss_dict = self.build_loss(rpn_box_pred=rpn_box_pred,
                                        rpn_bbox_targets=rpn_bbox_targets,
                                        rpn_cls_score=rpn_cls_score,
                                        rpn_labels=rpn_labels,
                                        bbox_pred_h=bbox_pred_h,
                                        bbox_targets_h=bbox_targets_h,
                                        cls_score_h=cls_score_h,
                                        bbox_pred_r=bbox_pred_r,
                                        bbox_targets_r=bbox_targets_r,
                                        cls_score_r=cls_score_r,
                                        labels=labels)

            final_boxes_h, final_scores_h, final_category_h = self.postprocess_fastrcnn_h(
                rois=rois,
                bbox_ppred=bbox_pred_h,
                scores=cls_prob_h,
                img_shape=img_shape)
            final_boxes_r, final_scores_r, final_category_r = self.postprocess_fastrcnn_r(
                rois=rois,
                bbox_ppred=bbox_pred_r,
                scores=cls_prob_r,
                img_shape=img_shape)

            return final_boxes_h, final_scores_h, final_category_h, \
                   final_boxes_r, final_scores_r, final_category_r, loss_dict
示例#8
0
if __name__ == "__main__":

    print('number samples: {0}'.format(get_num_samples(tfrecord_dir)))
    # create local and global variables initializer group
    # image, filename, gtboxes_and_label, num_objects = reader_tfrecord(record_file=tfrecord_dir,
    #                                                                   shortside_len=IMG_SHORT_SIDE_LEN,
    #                                                                   is_training=True)
    filename_batch, image_batch, gtboxes_and_label_batch, num_objects_batch = dataset_tfrecord(
        record_file=tfrecord_dir,
        shortside_len=IMG_SHORT_SIDE_LEN,
        length_limitation=IMG_MAX_LENGTH,
        is_training=True)
    gtboxes_and_label_tensor = tf.reshape(gtboxes_and_label_batch, [-1, 5])

    gtboxes_in_img = show_box_in_tensor.draw_boxes_with_categories(
        img_batch=image_batch,
        boxes=gtboxes_and_label_tensor[:, :-1],
        labels=gtboxes_and_label_tensor[:, -1])
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())
    with tf.Session() as sess:
        sess.run(init_op)
        # create Coordinator to manage the life period of multiple thread
        coord = tf.train.Coordinator()
        # Starts all queue runners collected in the graph to execute input queue operation
        # the step contain two operation:filename to filename queue and sample to sample queue
        threads = tf.train.start_queue_runners(coord=coord)
        try:
            if not coord.should_stop():
                filename, image, gtboxes_and_label, gtbox_img = sess.run([
                    filename_batch, image_batch, gtboxes_and_label_batch,
                    gtboxes_in_img
示例#9
0
def train():
  with tf.Graph().as_default():
    with tf.name_scope('get_batch'):
      img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch = \
          next_batch(dataset_name=cfgs.DATASET_NAME,
                     batch_size=cfgs.BATCH_SIZE,
                     shortside_len=cfgs.SHORT_SIDE_LEN,
                     is_training=True)

    with tf.name_scope('draw_gtboxes'):
      gtboxes_in_img = draw_box_with_color(img_batch, tf.reshape(gtboxes_and_label_batch, [-1, 5])[:, :-1],
                                           text=tf.shape(gtboxes_and_label_batch)[1])

    # ***********************************************************************************************
    # *                                         share net                                           *
    # ***********************************************************************************************
    _, share_net = get_network_byname(net_name=cfgs.NET_NAME,
                                      inputs=img_batch,
                                      num_classes=None,
                                      is_training=True,
                                      output_stride=None,
                                      global_pool=False,
                                      spatial_squeeze=False)

    # ***********************************************************************************************
    # *                                            rpn                                              *
    # ***********************************************************************************************
    rpn = build_rpn.RPN(net_name=cfgs.NET_NAME,
                        inputs=img_batch,
                        gtboxes_and_label=tf.squeeze(gtboxes_and_label_batch, 0),
                        is_training=True,
                        share_head=cfgs.SHARE_HEAD,
                        share_net=share_net,
                        stride=cfgs.STRIDE,
                        anchor_ratios=cfgs.ANCHOR_RATIOS,
                        anchor_scales=cfgs.ANCHOR_SCALES,
                        scale_factors=cfgs.SCALE_FACTORS,
                        base_anchor_size_list=cfgs.BASE_ANCHOR_SIZE_LIST,  # P2, P3, P4, P5, P6
                        level=cfgs.LEVEL,
                        top_k_nms=cfgs.RPN_TOP_K_NMS,
                        rpn_nms_iou_threshold=cfgs.RPN_NMS_IOU_THRESHOLD,
                        max_proposals_num=cfgs.MAX_PROPOSAL_NUM,
                        rpn_iou_positive_threshold=cfgs.RPN_IOU_POSITIVE_THRESHOLD,
                        # iou>=0.7 is positive box, iou< 0.3 is negative
                        rpn_iou_negative_threshold=cfgs.RPN_IOU_NEGATIVE_THRESHOLD,
                        rpn_mini_batch_size=cfgs.RPN_MINIBATCH_SIZE,
                        rpn_positives_ratio=cfgs.RPN_POSITIVE_RATE,
                        remove_outside_anchors=False,  # whether remove anchors outside
                        rpn_weight_decay=cfgs.WEIGHT_DECAY[cfgs.NET_NAME])

    rpn_proposals_boxes, rpn_proposals_scores = rpn.rpn_proposals()  # rpn_score shape: [300, ]

    rpn_location_loss, rpn_classification_loss = rpn.rpn_losses()
    rpn_total_loss = rpn_classification_loss + rpn_location_loss

    with tf.name_scope('draw_proposals'):
      # score > 0.5 is object
      rpn_object_boxes_indices = tf.reshape(tf.where(tf.greater(rpn_proposals_scores, 0.5)), [-1])
      rpn_object_boxes = tf.gather(rpn_proposals_boxes, rpn_object_boxes_indices)

      rpn_proposals_objcet_boxes_in_img = draw_box_with_color(img_batch, rpn_object_boxes,
                                                              text=tf.shape(rpn_object_boxes)[0])
      rpn_proposals_boxes_in_img = draw_box_with_color(img_batch, rpn_proposals_boxes,
                                                       text=tf.shape(rpn_proposals_boxes)[0])
    # ***********************************************************************************************
    # *                                         Fast RCNN                                           *
    # ***********************************************************************************************

    fast_rcnn = build_fast_rcnn.FastRCNN(img_batch=img_batch,
                                         feature_pyramid=rpn.feature_pyramid,
                                         rpn_proposals_boxes=rpn_proposals_boxes,
                                         rpn_proposals_scores=rpn_proposals_scores,
                                         img_shape=tf.shape(img_batch),
                                         roi_size=cfgs.ROI_SIZE,
                                         roi_pool_kernel_size=cfgs.ROI_POOL_KERNEL_SIZE,
                                         scale_factors=cfgs.SCALE_FACTORS,
                                         gtboxes_and_label=tf.squeeze(gtboxes_and_label_batch, 0),
                                         fast_rcnn_nms_iou_threshold=cfgs.FAST_RCNN_NMS_IOU_THRESHOLD,
                                         fast_rcnn_maximum_boxes_per_img=100,
                                         fast_rcnn_nms_max_boxes_per_class=cfgs.FAST_RCNN_NMS_MAX_BOXES_PER_CLASS,
                                         show_detections_score_threshold=cfgs.FINAL_SCORE_THRESHOLD,  # show detections which score >= 0.6
                                         num_classes=cfgs.CLASS_NUM,
                                         fast_rcnn_minibatch_size=cfgs.FAST_RCNN_MINIBATCH_SIZE,
                                         fast_rcnn_positives_ratio=cfgs.FAST_RCNN_POSITIVE_RATE,
                                         # iou>0.5 is positive, iou<0.5 is negative
                                         fast_rcnn_positives_iou_threshold=cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD,
                                         use_dropout=False,
                                         weight_decay=cfgs.WEIGHT_DECAY[cfgs.NET_NAME],
                                         is_training=True,
                                         level=cfgs.LEVEL)

    fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category = \
        fast_rcnn.fast_rcnn_predict()
    fast_rcnn_location_loss, fast_rcnn_classification_loss = fast_rcnn.fast_rcnn_loss()
    fast_rcnn_total_loss = fast_rcnn_location_loss + fast_rcnn_classification_loss

    with tf.name_scope('draw_boxes_with_categories'):
      fast_rcnn_predict_boxes_in_imgs = draw_boxes_with_categories(img_batch=img_batch,
                                                                   boxes=fast_rcnn_decode_boxes,
                                                                   labels=detection_category,
                                                                   scores=fast_rcnn_score)

    # train
    added_loss = rpn_total_loss + fast_rcnn_total_loss
    total_loss = tf.losses.get_total_loss()

    global_step = tf.train.get_or_create_global_step()

    lr = tf.train.piecewise_constant(global_step,
                                     boundaries=[np.int64(20000), np.int64(40000)],
                                     values=[cfgs.LR, cfgs.LR / 10, cfgs.LR / 100])
    tf.summary.scalar('lr', lr)
    optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM)

    train_op = slim.learning.create_train_op(total_loss, optimizer, global_step)  # rpn_total_loss,
    # train_op = optimizer.minimize(second_classification_loss, global_step)

    # ***********************************************************************************************
    # *                                          Summary                                            *
    # ***********************************************************************************************
    # ground truth and predict
    tf.summary.image('img/gtboxes', gtboxes_in_img)
    tf.summary.image('img/faster_rcnn_predict', fast_rcnn_predict_boxes_in_imgs)
    # rpn loss and image
    tf.summary.scalar('rpn/rpn_location_loss', rpn_location_loss)
    tf.summary.scalar('rpn/rpn_classification_loss', rpn_classification_loss)
    tf.summary.scalar('rpn/rpn_total_loss', rpn_total_loss)

    tf.summary.scalar('fast_rcnn/fast_rcnn_location_loss', fast_rcnn_location_loss)
    tf.summary.scalar('fast_rcnn/fast_rcnn_classification_loss', fast_rcnn_classification_loss)
    tf.summary.scalar('fast_rcnn/fast_rcnn_total_loss', fast_rcnn_total_loss)

    tf.summary.scalar('loss/added_loss', added_loss)
    tf.summary.scalar('loss/total_loss', total_loss)

    tf.summary.image('rpn/rpn_all_boxes', rpn_proposals_boxes_in_img)
    tf.summary.image('rpn/rpn_object_boxes', rpn_proposals_objcet_boxes_in_img)
    # learning_rate
    tf.summary.scalar('learning_rate', lr)

    summary_op = tf.summary.merge_all()
    init_op = tf.group(
        tf.global_variables_initializer(),
        tf.local_variables_initializer()
    )

    restorer, restore_ckpt = restore_model.get_restorer(test=False)
    saver = tf.train.Saver(max_to_keep=10)

    config = tf.ConfigProto()
    # config.gpu_options.per_process_gpu_memory_fraction = 0.5
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
      sess.run(init_op)
      if not restorer is None:
        restorer.restore(sess, restore_ckpt)
        print('restore model')
      coord = tf.train.Coordinator()
      threads = tf.train.start_queue_runners(sess, coord)

      summary_path = os.path.join(cfgs.ROOT_PATH + 'output/{}'.format(cfgs.DATASET_NAME),
                                  FLAGS.summary_path, cfgs.VERSION)
      mkdir(summary_path)
      summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph)

      for step in range(cfgs.MAX_ITERATION):
        training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
        start = time.time()

        _global_step, _img_name_batch, _rpn_location_loss, _rpn_classification_loss, \
            _rpn_total_loss, _fast_rcnn_location_loss, _fast_rcnn_classification_loss, \
            _fast_rcnn_total_loss, _added_loss, _total_loss, _ = \
            sess.run([global_step, img_name_batch, rpn_location_loss, rpn_classification_loss,
                      rpn_total_loss, fast_rcnn_location_loss, fast_rcnn_classification_loss,
                      fast_rcnn_total_loss, added_loss, total_loss, train_op])

        end = time.time()

        if step % 50 == 0:
          print("""{}: step{} image_name:{}
                     rpn_loc_loss:{:.4f} | rpn_cla_loss:{:.4f} | rpn_total_loss:{:.4f}
                     fast_rcnn_loc_loss:{:.4f} | fast_rcnn_cla_loss:{:.4f} | fast_rcnn_total_loss:{:.4f}
                     added_loss:{:.4f} | total_loss:{:.4f} | pre_cost_time:{:.4f}s"""
                .format(training_time, _global_step, str(_img_name_batch[0]), _rpn_location_loss,
                        _rpn_classification_loss, _rpn_total_loss, _fast_rcnn_location_loss,
                        _fast_rcnn_classification_loss, _fast_rcnn_total_loss, _added_loss, _total_loss,
                        (end - start)))

        if step % 500 == 0:
          summary_str = sess.run(summary_op)
          summary_writer.add_summary(summary_str, _global_step)
          summary_writer.flush()

        if (step > 15000 and step % 1000 == 0) or (step == cfgs.MAX_ITERATION - 1):
          save_dir = os.path.join(cfgs.ROOT_PATH + 'output/{}'.format(cfgs.DATASET_NAME),
                                  FLAGS.trained_checkpoint, cfgs.VERSION)
          mkdir(save_dir)

          save_ckpt = os.path.join(save_dir, '{}_'.format(
              cfgs.DATASET_NAME)+str(_global_step)+'model.ckpt')
          saver.save(sess, save_ckpt)
          print('Weights have been saved to {}.'.format(save_ckpt))

      print('Training done.')

      coord.request_stop()
      coord.join(threads)
示例#10
0
    def build_head_train_sample(self):
        """
        when training, we should know each reference box's label and gtbox,
        in second stage
        iou >= 0.5 is object
        iou < 0.5 is background
        this function need batch_slice
        :return:
        minibatch_reference_proboxes: (batch_szie, config.HEAD_MINIBATCH_SIZE, 4)[y1, x1, y2, x2]
        minibatch_encode_gtboxes:(batch_szie, config.HEAD_MINIBATCH_SIZE, 4)[dy, dx, log(dh), log(dw)]
        object_mask:(batch_szie, config.HEAD_MINIBATCH_SIZE) 1 indicate is object, 0 indicate is not objects
        label: (batch_szie, config.HEAD_MINIBATCH_SIZE)  # config.HEAD_MINIBATCH_SIZE 表示 classes_id
        """

        with tf.name_scope('build_head_train_sample'):

            def batch_slice_build_sample(gtboxes_and_label,
                                         rpn_proposals_boxes):

                with tf.name_scope('select_pos_neg_samples'):
                    gtboxes = tf.cast(
                        tf.reshape(gtboxes_and_label[:, :-1], [-1, 4]),
                        tf.float32)
                    gt_class_ids = tf.cast(
                        tf.reshape(gtboxes_and_label[:, -1], [
                            -1,
                        ]), tf.int32)
                    gtboxes, non_zeros = boxes_utils.trim_zeros_graph(
                        gtboxes, name="trim_gt_box")  # [M, 4]
                    gt_class_ids = tf.boolean_mask(gt_class_ids, non_zeros)
                    rpn_proposals_boxes, _ = boxes_utils.trim_zeros_graph(
                        rpn_proposals_boxes, name="trim_rpn_proposal_train")

                    ious = iou.iou_calculate(rpn_proposals_boxes,
                                             gtboxes)  # [N, M]
                    matchs = tf.cast(tf.argmax(ious, axis=1),
                                     tf.int32)  # [N, ]
                    max_iou_each_row = tf.reduce_max(ious, axis=1)
                    positives = tf.cast(
                        tf.greater_equal(
                            max_iou_each_row,
                            cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD), tf.int32)

                    reference_boxes_mattached_gtboxes = tf.gather(
                        gtboxes, matchs)  # [N, 4]
                    gt_class_ids = tf.gather(gt_class_ids, matchs)  # [N, ]
                    object_mask = tf.cast(positives, tf.float32)  # [N, ]
                    # when box is background, not caculate gradient, so give a weight 0 to avoid caculate gradient
                    gt_class_ids = gt_class_ids * positives

                with tf.name_scope('head_train_minibatch'):
                    # choose the positive indices
                    positive_indices = tf.reshape(
                        tf.where(tf.equal(object_mask, 1.)), [-1])
                    num_of_positives = tf.minimum(
                        tf.shape(positive_indices)[0],
                        tf.cast(
                            cfgs.FAST_RCNN_MINIBATCH_SIZE *
                            cfgs.FAST_RCNN_POSITIVE_RATE, tf.int32))
                    positive_indices = tf.random_shuffle(positive_indices)
                    positive_indices = tf.slice(positive_indices,
                                                begin=[0],
                                                size=[num_of_positives])
                    # choose the negative indices,
                    # Strictly propose the proportion of positive and negative is 1:3
                    negative_indices = tf.reshape(
                        tf.where(tf.equal(object_mask, 0.)), [-1])
                    num_of_negatives = tf.cast(int(1. / cfgs.FAST_RCNN_POSITIVE_RATE) * num_of_positives, tf.int32)\
                                       - num_of_positives

                    num_of_negatives = tf.minimum(
                        tf.shape(negative_indices)[0], num_of_negatives)
                    negative_indices = tf.random_shuffle(negative_indices)
                    negative_indices = tf.slice(negative_indices,
                                                begin=[0],
                                                size=[num_of_negatives])

                    minibatch_indices = tf.concat(
                        [positive_indices, negative_indices], axis=0)
                    minibatch_reference_gtboxes = tf.gather(
                        reference_boxes_mattached_gtboxes, minibatch_indices)
                    minibatch_reference_proboxes = tf.gather(
                        rpn_proposals_boxes, minibatch_indices)
                    # encode gtboxes
                    minibatch_encode_gtboxes = \
                        encode_and_decode.encode_boxes(
                            unencode_boxes=minibatch_reference_gtboxes,
                            reference_boxes=minibatch_reference_proboxes,
                            scale_factors=cfgs.BBOX_STD_DEV)
                    object_mask = tf.gather(object_mask, minibatch_indices)
                    gt_class_ids = tf.gather(gt_class_ids, minibatch_indices)

                    # padding if necessary
                    gap = tf.cast(cfgs.FAST_RCNN_MINIBATCH_SIZE -
                                  (num_of_positives + num_of_negatives),
                                  dtype=tf.int32)
                    bbox_padding = tf.zeros((gap, 4))
                    minibatch_reference_proboxes = tf.concat(
                        [minibatch_reference_proboxes, bbox_padding], axis=0)
                    minibatch_encode_gtboxes = tf.concat(
                        [minibatch_encode_gtboxes, bbox_padding], axis=0)
                    object_mask = tf.pad(object_mask, [(0, gap)])
                    gt_class_ids = tf.pad(gt_class_ids, [(0, gap)])

                return minibatch_reference_proboxes, minibatch_encode_gtboxes, object_mask, gt_class_ids

            minibatch_reference_proboxes, minibatch_encode_gtboxes, object_mask, gt_class_ids = \
                    boxes_utils.batch_slice([self.gtboxes_and_label, self.rpn_proposals_boxes],
                                            lambda x, y: batch_slice_build_sample(x, y),
                                            cfgs.BATCH_SIZE)
        if cfgs.DEBUG:
            gt_vision = draw_boxes_with_categories(
                self.origin_image[0], self.gtboxes_and_label[0, :, :4],
                self.gtboxes_and_label[0, :, 4], cfgs.LABEL_TO_NAME)
            tf.summary.image("gt_vision", gt_vision)

            draw_bbox_train = draw_boxes_with_categories(
                self.origin_image[0], minibatch_reference_proboxes[0],
                gt_class_ids[0], cfgs.LABEL_TO_NAME)
            tf.summary.image("train_proposal", draw_bbox_train)

        return minibatch_reference_proboxes, minibatch_encode_gtboxes, object_mask, gt_class_ids
示例#11
0
    def build_whole_detection_network(self, input_img_batch, gtboxes_r_batch,
                                      gtboxes_h_batch):

        if self.is_training:
            # ensure shape is [M, 5] and [M, 6]
            gtboxes_r_batch = tf.reshape(gtboxes_r_batch, [-1, 6])
            gtboxes_h_batch = tf.reshape(gtboxes_h_batch, [-1, 5])
            gtboxes_r_batch = tf.cast(gtboxes_r_batch, tf.float32)
            gtboxes_h_batch = tf.cast(gtboxes_h_batch, tf.float32)

        img_shape = tf.shape(input_img_batch)

        # 1. build base network
        C2_, C4 = self.build_base_network(input_img_batch)

        C2 = slim.conv2d(C2_,
                         num_outputs=1024,
                         kernel_size=[1, 1],
                         stride=1,
                         scope='build_C2_to_1024')

        self.feature_pyramid = {'C2': C2, 'C4': C4}

        # 2. build rpn

        rpn_all_encode_boxes = {}
        rpn_all_boxes_scores = {}
        rpn_all_cls_score = {}
        anchors = {}

        with tf.variable_scope('build_rpn',
                               regularizer=slim.l2_regularizer(
                                   cfgs.WEIGHT_DECAY)):
            i = 0
            for level in self.level:
                rpn_conv3x3 = slim.conv2d(
                    self.feature_pyramid[level],
                    512, [3, 3],
                    trainable=self.is_training,
                    weights_initializer=cfgs.INITIALIZER,
                    activation_fn=tf.nn.relu,
                    scope='rpn_conv/3x3_{}'.format(level))
                rpn_cls_score = slim.conv2d(
                    rpn_conv3x3,
                    self.num_anchors_per_location[i] * 2, [1, 1],
                    stride=1,
                    trainable=self.is_training,
                    weights_initializer=cfgs.INITIALIZER,
                    activation_fn=None,
                    scope='rpn_cls_score_{}'.format(level))
                rpn_box_pred = slim.conv2d(
                    rpn_conv3x3,
                    self.num_anchors_per_location[i] * 4, [1, 1],
                    stride=1,
                    trainable=self.is_training,
                    weights_initializer=cfgs.BBOX_INITIALIZER,
                    activation_fn=None,
                    scope='rpn_bbox_pred_{}'.format(level))
                rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
                rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2])
                rpn_cls_prob = slim.softmax(
                    rpn_cls_score,
                    scope='rpn_cls_prob_{}'.format(level))  # do the softmax

                rpn_all_cls_score[level] = rpn_cls_score
                rpn_all_boxes_scores[level] = rpn_cls_prob  # do the softmax
                rpn_all_encode_boxes[level] = rpn_box_pred
                i += 1

        # 3. generate_anchors
        i = 0
        for level, base_anchor_size, stride in zip(self.level,
                                                   self.base_anchor_size_list,
                                                   self.stride):
            featuremap_height, featuremap_width = tf.shape(
                self.feature_pyramid[level])[1], tf.shape(
                    self.feature_pyramid[level])[2]

            featuremap_height = tf.cast(featuremap_height, tf.float32)
            featuremap_width = tf.cast(featuremap_width, tf.float32)

            #anchor_scale = tf.constant(self.anchor_scales[i], dtype=tf.float32)
            #)anchor_ratio = tf.constant(self.anchor_ratios[i], dtype=tf.float32)
            anchor_scale = self.anchor_scales[i]
            anchor_ratio = self.anchor_ratios[i]

            tmp_anchors = anchor_utils.make_anchors(
                base_anchor_size=base_anchor_size,
                anchor_scales=anchor_scale,
                anchor_ratios=anchor_ratio,
                featuremap_height=featuremap_height,
                featuremap_width=featuremap_width,
                stride=stride,
                name="make_anchors_forRPN_{}".format(level))
            tmp_anchors = tf.reshape(tmp_anchors, [-1, 4])
            anchors[level] = tmp_anchors
            i += 1

        # with tf.variable_scope('make_anchors'):
        #     anchors = anchor_utils.make_anchors(height=featuremap_height,
        #                                         width=featuremap_width,
        #                                         feat_stride=cfgs.ANCHOR_STRIDE[0],
        #                                         anchor_scales=cfgs.ANCHOR_SCALES,
        #                                         anchor_ratios=cfgs.ANCHOR_RATIOS, base_size=16
        #                                         )

        # 4. postprocess rpn proposals. such as: decode, clip, NMS
        rois = {}
        roi_scores = {}
        with tf.variable_scope('postprocess_RPN'):
            # rpn_cls_prob = tf.reshape(rpn_cls_score, [-1, 2])
            # rpn_cls_prob = slim.softmax(rpn_cls_prob, scope='rpn_cls_prob')
            # rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
            for level in self.level:
                rois_rpn, roi_scores_rpn = postprocess_rpn_proposals(
                    rpn_bbox_pred=rpn_all_encode_boxes[level],
                    rpn_cls_prob=rpn_all_boxes_scores[level],
                    img_shape=img_shape,
                    anchors=anchors[level],
                    is_training=self.is_training)
                # rois[level] = rois
                # roi_scores[level] = roi_scores
                # rois shape [-1, 4]
                # +++++++++++++++++++++++++++++++++++++add img smry+++++++++++++++++++++++++++++++++++++++++++++++++++++++
                rois[level] = rois_rpn
                roi_scores[level] = roi_scores_rpn

                if self.is_training:
                    rois_in_img = show_box_in_tensor.draw_boxes_with_categories(
                        img_batch=input_img_batch,
                        boxes=rois_rpn,
                        scores=roi_scores_rpn)
                    tf.summary.image('all_rpn_rois_{}'.format(level),
                                     rois_in_img)

                    score_gre_05 = tf.reshape(
                        tf.where(tf.greater_equal(roi_scores_rpn, 0.5)), [-1])
                    score_gre_05_rois = tf.gather(rois_rpn, score_gre_05)
                    score_gre_05_score = tf.gather(roi_scores_rpn,
                                                   score_gre_05)
                    score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_categories(
                        img_batch=input_img_batch,
                        boxes=score_gre_05_rois,
                        scores=score_gre_05_score)
                    tf.summary.image('score_greater_05_rois_{}'.format(level),
                                     score_gre_05_in_img)
            # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

        rpn_labels = {}
        rpn_bbox_targets = {}
        labels_all = []
        labels = {}
        bbox_targets_h = {}
        bbox_targets_r = {}
        bbox_targets_all_h = []
        bbox_targets_all_r = []

        if self.is_training:
            for level in self.level:
                with tf.variable_scope(
                        'sample_anchors_minibatch_{}'.format(level)):
                    rpn_labels_one, rpn_bbox_targets_one = \
                        tf.py_func(
                            anchor_target_layer,
                            [gtboxes_h_batch, img_shape, anchors[level]],
                            [tf.float32, tf.float32])
                    rpn_bbox_targets_one = tf.reshape(rpn_bbox_targets_one,
                                                      [-1, 4])
                    rpn_labels_one = tf.to_int32(
                        rpn_labels_one, name="to_int32_{}".format(level))
                    rpn_labels_one = tf.reshape(rpn_labels_one, [-1])
                    self.add_anchor_img_smry(input_img_batch, anchors[level],
                                             rpn_labels_one)

                    # -----------------------------add to the dict-------------------------------------------------------------
                    rpn_labels[level] = rpn_labels_one
                    rpn_bbox_targets[level] = rpn_bbox_targets_one
                # --------------------------------------add smry-----------------------------------------------------------

                rpn_cls_category = tf.argmax(rpn_all_boxes_scores[level],
                                             axis=1)
                kept_rpppn = tf.reshape(
                    tf.where(tf.not_equal(rpn_labels_one, -1)), [-1])
                rpn_cls_category = tf.gather(rpn_cls_category,
                                             kept_rpppn)  # 预测
                acc = tf.reduce_mean(
                    tf.to_float(
                        tf.equal(
                            rpn_cls_category,
                            tf.to_int64(tf.gather(rpn_labels_one,
                                                  kept_rpppn)))))
                tf.summary.scalar('ACC/rpn_accuracy_{}'.format(level), acc)

                with tf.control_dependencies([rpn_labels[level]]):
                    with tf.variable_scope(
                            'sample_RCNN_minibatch_{}'.format(level)):
                        rois_, labels_, bbox_targets_h_, bbox_targets_r_ = \
                        tf.py_func(proposal_target_layer,
                                   [rois[level], gtboxes_h_batch, gtboxes_r_batch],
                                   [tf.float32, tf.float32, tf.float32, tf.float32])

                        rois_fast = tf.reshape(rois_, [-1, 4])
                        labels_fast = tf.to_int32(labels_)
                        labels_fast = tf.reshape(labels_fast, [-1])
                        bbox_targets_h_fast = tf.reshape(
                            bbox_targets_h_, [-1, 4 * (cfgs.CLASS_NUM + 1)])
                        bbox_targets_r_fast = tf.reshape(
                            bbox_targets_r_, [-1, 5 * (cfgs.CLASS_NUM + 1)])
                        self.add_roi_batch_img_smry(input_img_batch, rois_fast,
                                                    labels_fast)
                        #----------------------new_add----------------------
                        rois[level] = rois_fast
                        labels[level] = labels_fast
                        bbox_targets_h[level] = bbox_targets_h_fast
                        bbox_targets_r[level] = bbox_targets_r_fast
                        labels_all.append(labels_fast)
                        bbox_targets_all_h.append(bbox_targets_h_fast)
                        bbox_targets_all_r.append(bbox_targets_r_fast)

            fast_labels = tf.concat(labels_all, axis=0)
            fast_bbox_targets_h = tf.concat(bbox_targets_all_h, axis=0)
            fast_bbox_targets_r = tf.concat(bbox_targets_all_r, axis=0)
        # -------------------------------------------------------------------------------------------------------------#
        #                                            Fast-RCNN                                                         #
        # -------------------------------------------------------------------------------------------------------------#

        # 5. build Fast-RCNN
        # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10)

        bbox_pred_h, cls_score_h, bbox_pred_r, cls_score_r = self.build_fastrcnn(
            feature_to_cropped=self.feature_pyramid,
            rois_all=rois,
            img_shape=img_shape)

        # 这里的feature_to_cropped是feature maps 特征图
        # bbox_pred shape: [-1, 4*(cls_num+1)].
        # cls_score shape: [-1, cls_num+1]

        cls_prob_h = slim.softmax(cls_score_h,
                                  'cls_prob_h')  # 根据代码可看到水平和旋转的处理过程是分开的
        cls_prob_r = slim.softmax(cls_score_r, 'cls_prob_r')

        # ----------------------------------------------add smry-------------------------------------------------------
        if self.is_training:

            cls_category_h = tf.argmax(cls_prob_h, axis=1)
            fast_acc_h = tf.reduce_mean(
                tf.to_float(tf.equal(cls_category_h,
                                     tf.to_int64(fast_labels))))
            tf.summary.scalar('ACC/fast_acc_h', fast_acc_h)

            cls_category_r = tf.argmax(cls_prob_r, axis=1)
            fast_acc_r = tf.reduce_mean(
                tf.to_float(tf.equal(cls_category_r,
                                     tf.to_int64(fast_labels))))
            tf.summary.scalar('ACC/fast_acc_r', fast_acc_r)

        #  6. postprocess_fastrcnn
        if not self.is_training:

            rois_all = []
            for level in self.level:
                rois_all.append(rois[level])
            rois = tf.concat(rois_all, axis=0)

            final_boxes_h, final_scores_h, final_category_h = self.postprocess_fastrcnn_h(
                rois=rois,
                bbox_ppred=bbox_pred_h,
                scores=cls_prob_h,
                img_shape=img_shape)
            final_boxes_r, final_scores_r, final_category_r = self.postprocess_fastrcnn_r(
                rois=rois,
                bbox_ppred=bbox_pred_r,
                scores=cls_prob_r,
                img_shape=img_shape)
            return final_boxes_h, final_scores_h, final_category_h, final_boxes_r, final_scores_r, final_category_r
        else:
            '''
            when trian. We need build Loss
            '''
            loss_dict = self.build_loss(rpn_box_pred=rpn_all_encode_boxes,
                                        rpn_bbox_targets=rpn_bbox_targets,
                                        rpn_cls_score=rpn_all_cls_score,
                                        rpn_labels=rpn_labels,
                                        bbox_pred_h=bbox_pred_h,
                                        bbox_targets_h=fast_bbox_targets_h,
                                        cls_score_h=cls_score_h,
                                        bbox_pred_r=bbox_pred_r,
                                        bbox_targets_r=fast_bbox_targets_r,
                                        cls_score_r=cls_score_r,
                                        labels=fast_labels)
            rois_all = []
            for level in self.level:
                rois_all.append(rois[level])
            rois = tf.concat(rois_all, axis=0)

            final_boxes_h, final_scores_h, final_category_h = self.postprocess_fastrcnn_h(
                rois=rois,
                bbox_ppred=bbox_pred_h,
                scores=cls_prob_h,
                img_shape=img_shape)
            final_boxes_r, final_scores_r, final_category_r = self.postprocess_fastrcnn_r(
                rois=rois,
                bbox_ppred=bbox_pred_r,
                scores=cls_prob_r,
                img_shape=img_shape)

            return final_boxes_h, final_scores_h, final_category_h, \
                   final_boxes_r, final_scores_r, final_category_r, loss_dict
示例#12
0
def train():
    retinanet = build_whole_network.DetectionNetwork(
        base_network_name=cfgs.NET_NAME, is_training=True)

    with tf.name_scope('get_batch'):
        img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch = \
            next_batch(dataset_name=cfgs.DATASET_NAME,  # 'pascal', 'coco'
                       batch_size=cfgs.BATCH_SIZE,
                       shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
                       is_training=True)
        gtboxes_and_label = tf.reshape(gtboxes_and_label_batch, [-1, 5])
        if cfgs.NET_NAME in ['resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d']:
            img_batch = img_batch / tf.constant([cfgs.PIXEL_STD])

    final_bbox, final_scores, final_category, loss_dict = retinanet.build_whole_detection_network(
        input_img_batch=img_batch, gtboxes_batch=gtboxes_and_label)

    # ----------------------------------------------------------------------------------------------------build loss
    weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses())
    cls_loss = loss_dict['cls_loss']
    reg_loss = loss_dict['reg_loss']
    total_loss = cls_loss + reg_loss + weight_decay_loss

    # ---------------------------------------------------------------------------------------------------add summary
    tf.summary.scalar('RETINANET_LOSS/cls_loss', cls_loss)
    tf.summary.scalar('RETINANET_LOSS/reg_loss', reg_loss)

    tf.summary.scalar('LOSS/total_loss', total_loss)
    tf.summary.scalar('LOSS/regular_weights', weight_decay_loss)

    gtboxes_in_img = show_box_in_tensor.draw_boxes_with_categories(
        img_batch=img_batch,
        boxes=gtboxes_and_label[:, :-1],
        labels=gtboxes_and_label[:, -1])
    if cfgs.ADD_BOX_IN_TENSORBOARD:
        detections_in_img = show_box_in_tensor.draw_boxes_with_categories_and_scores(
            img_batch=img_batch,
            boxes=final_bbox,
            labels=final_category,
            scores=final_scores)
        tf.summary.image('Compare/final_detection', detections_in_img)
    tf.summary.image('Compare/gtboxes', gtboxes_in_img)

    global_step = slim.get_or_create_global_step()
    lr = tf.train.piecewise_constant(
        global_step,
        boundaries=[
            np.int64(cfgs.DECAY_STEP[0]),
            np.int64(cfgs.DECAY_STEP[1])
        ],
        values=[cfgs.LR, cfgs.LR / 10., cfgs.LR / 100.])
    tf.summary.scalar('lr', lr)
    optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM)

    # ---------------------------------------------------------------------------------------------compute gradients
    gradients = retinanet.get_gradients(optimizer, total_loss)

    # enlarge_gradients for bias
    if cfgs.MUTILPY_BIAS_GRADIENT:
        gradients = retinanet.enlarge_gradients_for_bias(gradients)

    if cfgs.GRADIENT_CLIPPING_BY_NORM:
        with tf.name_scope('clip_gradients'):
            gradients = slim.learning.clip_gradient_norms(
                gradients, cfgs.GRADIENT_CLIPPING_BY_NORM)

    # train_op
    train_op = optimizer.apply_gradients(grads_and_vars=gradients,
                                         global_step=global_step)
    summary_op = tf.summary.merge_all()
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    restorer, restore_ckpt = retinanet.get_restorer()
    saver = tf.train.Saver(max_to_keep=30)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        sess.run(init_op)
        if not restorer is None:
            restorer.restore(sess, restore_ckpt)
            print('restore model')
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess, coord)

        summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION)
        if not os.path.exists(summary_path):
            os.makedirs(summary_path)
        summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph)

        for step in range(cfgs.MAX_ITERATION):
            training_time = time.strftime('%Y-%m-%d %H:%M:%S',
                                          time.localtime(time.time()))

            if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0:
                _, global_stepnp = sess.run([train_op, global_step])

            else:
                if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0:
                    start = time.time()

                    _, global_stepnp, img_name, reg_loss_, cls_loss_, total_loss_ = \
                        sess.run(
                            [train_op, global_step, img_name_batch, reg_loss, cls_loss, total_loss])

                    end = time.time()

                    print(""" {}: step{}    image_name:{} |\t
                              reg_loss:{} |\t cls_loss:{} |\t total_loss:{} |per_cost_time:{}s""" \
                          .format(training_time, global_stepnp, str(img_name[0]), reg_loss_, cls_loss_, total_loss_,
                                  (end - start)))
                else:
                    if step % cfgs.SMRY_ITER == 0:
                        _, global_stepnp, summary_str = sess.run(
                            [train_op, global_step, summary_op])
                        summary_writer.add_summary(summary_str, global_stepnp)
                        summary_writer.flush()

            if (step > 0 and step % cfgs.SAVE_WEIGHTS_INTE
                    == 0) or (step == cfgs.MAX_ITERATION - 1):

                save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION)
                if not os.path.exists(save_dir):
                    os.makedirs(save_dir)

                save_ckpt = os.path.join(
                    save_dir, '{}_'.format(cfgs.DATASET_NAME) +
                    str(global_stepnp) + 'model.ckpt')
                saver.save(sess, save_ckpt)
                print(' weights had been saved')

        coord.request_stop()
        coord.join(threads)
示例#13
0
def train():
    with tf.Graph().as_default():
        tf.set_random_seed(1234)
        with tf.name_scope('get_batch'):
            img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch = \
                next_batch(dataset_name=cfgs.DATASET_NAME,
                           batch_size=cfgs.BATCH_SIZE,
                           shortside_len=cfgs.SHORT_SIDE_LEN,
                           is_training=True,
                           is_val=False)

        with tf.name_scope('draw_gtboxes'):
            gtboxes_in_img = draw_box_with_color(
                img_batch,
                tf.reshape(gtboxes_and_label_batch, [-1, 5])[:, :-1],
                text=tf.shape(gtboxes_and_label_batch)[1])

        # *********************************************************************
        # *                                         share net                                           *
        # ***********************************************************************************************
        _, share_net = get_network_byname(net_name=cfgs.NET_NAME,
                                          inputs=img_batch,
                                          num_classes=None,
                                          is_training=True,
                                          output_stride=None,
                                          global_pool=False,
                                          spatial_squeeze=False)

        # ***********************************************************************************************
        # *                                            rpn                                              *
        # ***********************************************************************************************
        rpn = build_rpn.RPN(
            net_name=cfgs.NET_NAME,
            inputs=img_batch,
            gtboxes_and_label=tf.squeeze(gtboxes_and_label_batch, 0),
            is_training=True,
            share_head=cfgs.SHARE_HEAD,
            share_net=share_net,
            stride=cfgs.STRIDE,
            anchor_ratios=cfgs.ANCHOR_RATIOS,
            anchor_scales=cfgs.ANCHOR_SCALES,
            scale_factors=cfgs.SCALE_FACTORS,
            base_anchor_size_list=cfgs.
            BASE_ANCHOR_SIZE_LIST,  # P2, P3, P4, P5, P6
            level=cfgs.LEVEL,
            top_k_nms=cfgs.RPN_TOP_K_NMS,
            rpn_nms_iou_threshold=cfgs.RPN_NMS_IOU_THRESHOLD,
            max_proposals_num=cfgs.MAX_PROPOSAL_NUM,
            rpn_iou_positive_threshold=cfgs.RPN_IOU_POSITIVE_THRESHOLD,
            rpn_iou_negative_threshold=cfgs.
            RPN_IOU_NEGATIVE_THRESHOLD,  # iou>=0.7 is positive box, iou< 0.3 is negative
            rpn_mini_batch_size=cfgs.RPN_MINIBATCH_SIZE,
            rpn_positives_ratio=cfgs.RPN_POSITIVE_RATE,
            remove_outside_anchors=False,  # whether remove anchors outside
            rpn_weight_decay=cfgs.WEIGHT_DECAY[cfgs.NET_NAME])

        rpn_proposals_boxes, rpn_proposals_scores = rpn.rpn_proposals(
        )  # rpn_score shape: [300, ]

        rpn_location_loss, rpn_classification_loss = rpn.rpn_losses()
        rpn_total_loss = rpn_classification_loss + rpn_location_loss

        with tf.name_scope('draw_proposals'):
            # score > 0.5 is object
            rpn_object_boxes_indices = tf.reshape(
                tf.where(tf.greater(rpn_proposals_scores, 0.5)), [-1])
            rpn_object_boxes = tf.gather(rpn_proposals_boxes,
                                         rpn_object_boxes_indices)

            rpn_proposals_objcet_boxes_in_img = draw_box_with_color(
                img_batch,
                rpn_object_boxes,
                text=tf.shape(rpn_object_boxes)[0])
            rpn_proposals_boxes_in_img = draw_box_with_color(
                img_batch,
                rpn_proposals_boxes,
                text=tf.shape(rpn_proposals_boxes)[0])
        # ***********************************************************************************************
        # *                                         Fast RCNN                                           *
        # ***********************************************************************************************

        fast_rcnn = build_fast_rcnn.FastRCNN(
            img_batch=img_batch,
            feature_pyramid=rpn.feature_pyramid,
            rpn_proposals_boxes=rpn_proposals_boxes,
            rpn_proposals_scores=rpn_proposals_scores,
            img_shape=tf.shape(img_batch),
            roi_size=cfgs.ROI_SIZE,
            roi_pool_kernel_size=cfgs.ROI_POOL_KERNEL_SIZE,
            scale_factors=cfgs.SCALE_FACTORS,
            gtboxes_and_label=tf.squeeze(gtboxes_and_label_batch, 0),
            fast_rcnn_nms_iou_threshold=cfgs.FAST_RCNN_NMS_IOU_THRESHOLD,
            fast_rcnn_maximum_boxes_per_img=100,
            fast_rcnn_nms_max_boxes_per_class=cfgs.
            FAST_RCNN_NMS_MAX_BOXES_PER_CLASS,
            show_detections_score_threshold=cfgs.
            FINAL_SCORE_THRESHOLD,  # show detections which score >= 0.6
            num_classes=cfgs.CLASS_NUM,
            fast_rcnn_minibatch_size=cfgs.FAST_RCNN_MINIBATCH_SIZE,
            fast_rcnn_positives_ratio=cfgs.FAST_RCNN_POSITIVE_RATE,
            fast_rcnn_positives_iou_threshold=cfgs.
            FAST_RCNN_IOU_POSITIVE_THRESHOLD,  # iou>0.5 is positive, iou<0.5 is negative
            use_dropout=False,
            weight_decay=cfgs.WEIGHT_DECAY[cfgs.NET_NAME],
            is_training=True,
            level=cfgs.LEVEL)

        fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category = \
            fast_rcnn.fast_rcnn_predict()
        fast_rcnn_location_loss, fast_rcnn_classification_loss = fast_rcnn.fast_rcnn_loss(
        )
        fast_rcnn_total_loss = fast_rcnn_location_loss + fast_rcnn_classification_loss

        with tf.name_scope('draw_boxes_with_categories'):
            fast_rcnn_predict_boxes_in_imgs = draw_boxes_with_categories(
                img_batch=img_batch,
                boxes=fast_rcnn_decode_boxes,
                labels=detection_category,
                scores=fast_rcnn_score)

        # train
        total_loss = slim.losses.get_total_loss()

        global_step = slim.get_or_create_global_step()  #返回并创建全局步长张量
        #
        # lr = tf.train.piecewise_constant(global_step,
        #                                  boundaries=[np.int64(10000), np.int64(20000)],
        #                                  values=[cfgs.LR, cfgs.LR / 10, cfgs.LR / 100])
        lr = tf.train.exponential_decay(cfgs.LR,
                                        global_step,
                                        decay_steps=5000,
                                        decay_rate=1 / 2.,
                                        staircase=True)
        # lr = tf.train.piecewise_constant(global_step,
        #                                  boundaries=[np.int64(30000), np.int64(40000)],
        #                                  values=[lr, cfgs.LR/100, cfgs.LR/1000])
        tf.summary.scalar('learning_rate', lr)
        # optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM)
        optimizer = tf.train.AdamOptimizer(lr,
                                           beta1=cfgs.MOMENTUM,
                                           beta2=0.999,
                                           epsilon=1e-8,
                                           use_locking=False,
                                           name='Adam')
        # optimizer = tf.train.RMSPropOptimizer(lr, decay=0.9, epsilon=1e-6, name='RMSProp')
        #创建一个计算梯度并返回损失的Operation
        train_op = slim.learning.create_train_op(
            total_loss, optimizer, global_step)  # rpn_total_loss,
        # train_op = optimizer.minimize(second_classification_loss, global_step)

        # ***********************************************************************************************
        # *                                          Summary                                            *
        # ***********************************************************************************************
        # ground truth and predict
        tf.summary.image('img/gtboxes', gtboxes_in_img)
        tf.summary.image('img/faster_rcnn_predict',
                         fast_rcnn_predict_boxes_in_imgs)
        # rpn loss and image
        tf.summary.scalar('rpn/rpn_location_loss', rpn_location_loss)
        tf.summary.scalar('rpn/rpn_classification_loss',
                          rpn_classification_loss)
        tf.summary.scalar('rpn/rpn_total_loss', rpn_total_loss)

        tf.summary.scalar('fast_rcnn/fast_rcnn_location_loss',
                          fast_rcnn_location_loss)
        tf.summary.scalar('fast_rcnn/fast_rcnn_classification_loss',
                          fast_rcnn_classification_loss)
        tf.summary.scalar('fast_rcnn/fast_rcnn_total_loss',
                          fast_rcnn_total_loss)

        tf.summary.scalar('loss/total_loss', total_loss)
        # #
        # tf.summary.image('C2', _concact_features(share_net['resnet_v1_50/block1/unit_2/bottleneck_v1'][:, :, :, 0:16]), 1)
        # tf.summary.image('C3', _concact_features(share_net['resnet_v1_50/block2/unit_3/bottleneck_v1'][:, :, :, 0:16]), 1)
        # tf.summary.image('C4', _concact_features(share_net['resnet_v1_50/block3/unit_5/bottleneck_v1'][:, :, :, 0:16]), 1)
        # tf.summary.image('C5', _concact_features(share_net['resnet_v1_50/block4'][:, :, :, 0:16]), 1)
        # tf.summary.image('P2', _concact_features(rpn.feature_pyramid['P2'][:, :, :, 0:16]),1)
        # tf.summary.image('P3', _concact_features(rpn.feature_pyramid['P3'][:, :, :, 0:16]),1)
        # tf.summary.image('P4', _concact_features(rpn.feature_pyramid['P4'][:, :, :, 0:16]),1)
        # tf.summary.image('P5', _concact_features(rpn.feature_pyramid['P5'][:, :, :, 0:16]), 1)
        # tf.summary.image('rpn/rpn_all_boxes', rpn_proposals_boxes_in_img)
        # tf.summary.image('rpn/rpn_object_boxes', rpn_proposals_objcet_boxes_in_img)
        # learning_rate
        # tf.summary.scalar('learning_rate', lr)

        summary_op = tf.summary.merge_all()
        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())

        restorer, restore_ckpt = restore_model.get_restorer()

        saver = tf.train.Saver(max_to_keep=16)

        config = tf.ConfigProto()
        # config.gpu_options.per_process_gpu_memory_fraction = 0.5
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:

            if cfgs.NET_NAME == 'pvanet':
                sess.run(init_op)
                coord = tf.train.Coordinator()
                threads = tf.train.start_queue_runners(sess, coord)
                start = 0
                if not restorer is None:
                    restorer.restore(sess, restore_ckpt)
                    print('restore model')
                    start = int("".join(
                        list(restore_ckpt.split('/')[-1])[4:8])) + 1
                else:
                    # read_npy.load_initial_weights(sess)
                    read_npy.load_ckpt_weights(sess)
            else:

                sess.run(init_op)
                # print(sess.run('resnet_v1_50/block4/unit_3/bottleneck_v1/conv3/BatchNorm/moving_variance'))
                # print(sess.run('vgg_16/block4/unit_3/bottleneck_v1/conv3/BatchNorm/moving_variance'))
                coord = tf.train.Coordinator()
                threads = tf.train.start_queue_runners(sess, coord)
                start = 0
                if not restorer is None:
                    restorer.restore(sess, restore_ckpt)
                    print('restore model')
                    # start = int("".join(list(restore_ckpt.split('/')[-1])[4:8]))+1

            summary_path = os.path.join(FLAGS.summary_path, cfgs.VERSION)
            mkdir(summary_path)
            summary_writer = tf.summary.FileWriter(summary_path,
                                                   graph=sess.graph)
            df = pd.DataFrame(
                [],
                columns=['Recall', 'Precision', 'mAP', 'F1_score'],
                index=[])

            for step in range(0, cfgs.MAX_ITERATION):
                # print(img_name_batch.eval())
                training_time = time.strftime('%Y-%m-%d %H:%M:%S',
                                              time.localtime(time.time()))
                start = time.time()

                _global_step, _img_name_batch, _rpn_location_loss, _rpn_classification_loss, \
                _rpn_total_loss, _fast_rcnn_location_loss, _fast_rcnn_classification_loss, \
                _fast_rcnn_total_loss, _total_loss, _ = \
                    sess.run([global_step, img_name_batch, rpn_location_loss, rpn_classification_loss,
                              rpn_total_loss, fast_rcnn_location_loss, fast_rcnn_classification_loss,
                              fast_rcnn_total_loss, total_loss, train_op])

                end = time.time()
                # if step == 100:
                #     save_dir = os.path.join(FLAGS.trained_checkpoint, cfgs.VERSION)
                #     mkdir(save_dir)
                #
                #     save_ckpt = os.path.join(save_dir, 'voc_' + str(_global_step) + 'model.ckpt')
                #     saver.save(sess, save_ckpt)
                #     print(' weights had been saved')
                # if step == 500:
                #     save_dir = os.path.join(FLAGS.trained_checkpoint, cfgs.VERSION)
                #     mkdir(save_dir)
                #
                #     save_ckpt = os.path.join(save_dir, 'voc_' + str(_global_step) + 'model.ckpt')
                #     saver.save(sess, save_ckpt)
                #     print(' weights had been saved')
                if step % 50 == 0:
                    print(""" {}: step{}    image_name:{} |\t
                                rpn_loc_loss:{} |\t rpn_cla_loss:{} |\t rpn_total_loss:{} |
                                fast_rcnn_loc_loss:{} |\t fast_rcnn_cla_loss:{} |\t fast_rcnn_total_loss:{} |
                                total_loss:{} |\t pre_cost_time:{}s""" \
                          .format(training_time, _global_step, str(_img_name_batch[0]), _rpn_location_loss,
                                  _rpn_classification_loss, _rpn_total_loss, _fast_rcnn_location_loss,
                                  _fast_rcnn_classification_loss, _fast_rcnn_total_loss, _total_loss,
                                  (end - start)))
                    # print(""" {}: step{}    image_name:{} |\t
                    #             rpn_loc_loss:{} |\t
                    #             fast_rcnn_loc_loss:{} |\t fast_rcnn_cla_loss:{} |\t fast_rcnn_total_loss:{} |
                    #             total_loss:{} |\t pre_cost_time:{}s""" \
                    #       .format(training_time, _global_step, str(_img_name_batch[0]), _rpn_location_loss,
                    #                _fast_rcnn_location_loss,
                    #               _fast_rcnn_classification_loss, _fast_rcnn_total_loss, _total_loss,
                    #               (end - start)))

                if step % 250 == 0:
                    summary_str = sess.run(summary_op)
                    summary_writer.add_summary(summary_str, _global_step)
                    summary_writer.flush()

                if (step > 0 and step % 2000 == 0) or (
                        step > 0 and
                    (step == 1000)) or (step == cfgs.MAX_ITERATION - 1):
                    save_dir = os.path.join(FLAGS.trained_checkpoint,
                                            cfgs.VERSION)
                    mkdir(save_dir)

                    save_ckpt = os.path.join(
                        save_dir, 'voc_' + str(_global_step) + 'model.ckpt')
                    saver.save(sess, save_ckpt)
                    print(' weights had been saved')
                #保存验证集信息
                if (step > 0
                        and step % 2000 == 0) or (step
                                                  == cfgs.MAX_ITERATION - 1):
                    save_excel = os.path.abspath(
                        '../'
                    ) + r'/Loss/' + cfgs.NET_NAME + r'_' + cfgs.VERSION
                    mkdir(save_excel)

                    new_index = np.append(df.index, [str(step)])
                    df2 = pd.DataFrame(
                        [valval.val(is_val=True)],
                        columns=['Recall', 'Precision', 'mAP', 'F1_score'])
                    df = df.append(df2)
                    df.index = new_index

                    df.to_excel(save_excel + r'/validation.xls')
                    print('validation result had been saved')

            coord.request_stop()
            coord.join(threads)
示例#14
0
def train():

    # Step 1:
    # clw note:传递网络名称如resnet_v1,是否训练is_training,以及每个位置含有anchor box的个数,
    #           构建基本的网络
    faster_rcnn = build_whole_network.DetectionNetwork(
        base_network_name=cfgs.NET_NAME, is_training=True)
    with tf.name_scope(
            'get_batch'
    ):  # clw note:tf.name_scope 主要结合 tf.Variable() 来使用,方便参数命名管理。
        # clw note:从文件队列、内存队列中读取、组合,得到该batch的内容
        #           主要包括每个批次(目前仅支持批次数目即batch_size=1,也就是这里每次只读出1张图片)
        #           对应的变量包括:图片名称、图片矩阵、ground truth坐标及对应的label,图片中包含的目标数
        #           这些变量的组成结构均为 [批次数目,相应批次中每一幅图片的相关信息]
        img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch = \
            next_batch(dataset_name=cfgs.DATASET_NAME,  # 'pascal', 'coco'
                       batch_size=cfgs.BATCH_SIZE,
                       shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
                       is_training=True)
        gtboxes_and_label = tf.reshape(gtboxes_and_label_batch, [-1, 5])
        # clw note:样本个数m不知道,但是对单个样本都有gtboxes的4个坐标,加上1个label共5个值;使用-1来自动计算样本个数

    biases_regularizer = tf.no_regularizer
    weights_regularizer = tf.contrib.layers.l2_regularizer(cfgs.WEIGHT_DECAY)

    # Step 2:
    # clw note:Faster R-CNN网络的搭建!

    # 先看一下下面这个函数arg_scope的声明
    # @tf_contextlib.contextmanager
    # def arg_scope(list_ops_or_scope, **kwargs): 功能是给list_ops中的内容设置默认值,即list中所有元素都用**kargs的参数设置。
    # 有函数修饰符@tf_contextlib.contextmanager修饰arg_scope函数:@之后一般接一个可调用对象为其执行一系列辅助操作,
    # 我们来看一个demo:
    #########################################
    # import time
    # def my_time(func):
    #     print(time.ctime())
    #     return func()
    #
    # @my_time  # 从这里可以看出@time 等价于 time(xxx()),但是这种写法你得考虑python代码的执行顺序
    # def xxx():
    #     print('Hello world!')
    #
    # 运行结果:
    # Wed Jul 26 23:01:21 2017
    # Hello world!
    ##########################################
    # 在这个例子中,xxx函数实现我们的主要功能,打印Hello world,但我们想给xxx函数添加一些辅助操作,让它同时打印出时间,于是我们用
    # 函数修饰符 @ my_time完成这个目标。整个例子的执行流程为调用my_time可调用对象,它接受xxx函数作为参数,先打印时间,再执行xxx函数
    # 详见:https://www.cnblogs.com/zzy-tf/p/9356883.html

    # 来看另一个demo:
    ##########################################
    # with slim.arg_scope(
    #                 [slim.conv2d, slim.max_pool2d, slim.avg_pool2d],stride = 1, padding = 'VALID'):
    #             net = slim.conv2d(inputs, 32, [3, 3], stride = 2, scope = 'Conv2d_1a_3x3')
    #             net = slim.conv2d(net, 32, [3, 3], scope = 'Conv2d_2a_3x3')
    #             net = slim.conv2d(net, 64, [3, 3], padding = 'SAME', scope = 'Conv2d_2b_3x3')
    # 所以,在使用过程中可以直接slim.conv2d( )等函数设置默认参数。例如在下面的代码中,不做单独声明的情况下,
    # slim.conv2d, slim.max_pool2d, slim.avg_pool2d三个函数默认的步长都设为1,padding模式都是'VALID'的。
    # 当然也可以在调用时进行单独声明,只不过一个一个写很麻烦,不如统一给个默认值。
    # 这种参数设置方式在构建网络模型时,尤其是较深的网络时,可以节省时间。
    with slim.arg_scope([
            slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose,
            slim.separable_conv2d, slim.fully_connected
    ],
                        weights_regularizer=weights_regularizer,
                        biases_regularizer=biases_regularizer,
                        biases_initializer=tf.constant_initializer(
                            0.0)):  # list as many types of layers as possible,
        # even if they are not used now

        # build_whole_detection_network功能:构建整体网络架构,包含backbone,RPN网络,Pooling层,以及后续网络。
        # return:网络的最后的预测框,预测的类别信息,预测的概率,以及整体网络和RPN网络的损失,所有的损失被写入到一个字典中。
        final_bbox, final_scores, final_category, loss_dict = faster_rcnn.build_whole_detection_network(
            input_img_batch=img_batch, gtboxes_batch=gtboxes_and_label)

    # ----------------------------------------------------------------------------------------------------build loss
    # weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses())
    # weight_decay_loss = tf.add_n(tf.losses.get_regularization_losses())
    rpn_location_loss = loss_dict['rpn_loc_loss']
    rpn_cls_loss = loss_dict['rpn_cls_loss']
    rpn_total_loss = rpn_location_loss + rpn_cls_loss

    fastrcnn_cls_loss = loss_dict['fastrcnn_cls_loss']
    fastrcnn_loc_loss = loss_dict['fastrcnn_loc_loss']
    fastrcnn_total_loss = fastrcnn_cls_loss + fastrcnn_loc_loss

    # clw note:根据论文的公式,最后将RPN网络的(分类,回归)误差与Fast-RCNN的(分类,回归)误差相加后作为总的误差进行训练即可。
    total_loss = rpn_total_loss + fastrcnn_total_loss
    # ____________________________________________________________________________________________________build loss

    # ---------------------------------------------------------------------------------------------------add summary
    tf.summary.scalar('RPN_LOSS/cls_loss', rpn_cls_loss)
    tf.summary.scalar('RPN_LOSS/location_loss', rpn_location_loss)
    tf.summary.scalar('RPN_LOSS/rpn_total_loss', rpn_total_loss)

    tf.summary.scalar('FAST_LOSS/fastrcnn_cls_loss', fastrcnn_cls_loss)
    tf.summary.scalar('FAST_LOSS/fastrcnn_location_loss', fastrcnn_loc_loss)
    tf.summary.scalar('FAST_LOSS/fastrcnn_total_loss', fastrcnn_total_loss)

    tf.summary.scalar('LOSS/total_loss', total_loss)
    # tf.summary.scalar('LOSS/regular_weights', weight_decay_loss)

    gtboxes_in_img = show_box_in_tensor.draw_boxes_with_categories(
        img_batch=img_batch,
        boxes=gtboxes_and_label[:, :-1],
        labels=gtboxes_and_label[:, -1])
    if cfgs.ADD_BOX_IN_TENSORBOARD:
        detections_in_img = show_box_in_tensor.draw_boxes_with_categories_and_scores(
            img_batch=img_batch,
            boxes=final_bbox,
            labels=final_category,
            scores=final_scores)
        tf.summary.image('Compare/final_detection', detections_in_img)
    tf.summary.image('Compare/gtboxes', gtboxes_in_img)

    # ___________________________________________________________________________________________________add summary

    global_step = slim.get_or_create_global_step()
    lr = tf.train.piecewise_constant(
        global_step,
        boundaries=[
            np.int64(cfgs.DECAY_STEP[0]),
            np.int64(cfgs.DECAY_STEP[1])
        ],
        values=[cfgs.LR, cfgs.LR / 10., cfgs.LR / 100.])
    tf.summary.scalar('lr', lr)
    optimizer = tf.train.MomentumOptimizer(
        lr, momentum=cfgs.MOMENTUM)  # clw note:选择优化器,可以尝试其他选择,
    # 也可以尝试tf.train.AdamOptimizer(1e-4).minimize(total_loss)

    # ---------------------------------------------------------------------------------------------compute gradients

    # clw note:对于上面优化器没有使用minimize()的几点说明,
    # 使用minimize()操作,该操作不仅可以计算出梯度,而且还可以将梯度作用在变量上。
    # 如果想按照自己的方式处理梯度,可以按照以下步骤:
    # 1、使用compute_gradients()计算梯度,其实下面的get_gradients()方法就是optimizer.compute_gradients(loss)
    # 2、使用自己的方式进一步处理梯度
    # 3、使用apply_gradients()应用处理过后的梯度;

    gradients = faster_rcnn.get_gradients(optimizer, total_loss)

    # enlarge_gradients for bias
    if cfgs.MUTILPY_BIAS_GRADIENT:
        gradients = faster_rcnn.enlarge_gradients_for_bias(gradients)

    if cfgs.GRADIENT_CLIPPING_BY_NORM:  # clw note:clip_by_norm是指对梯度进行裁剪,通过控制梯度的最大范式,防止梯度爆炸的问题,是一种比较常用的梯度规约的方式
        with tf.name_scope('clip_gradients_YJR'):
            gradients = slim.learning.clip_gradient_norms(
                gradients, cfgs.GRADIENT_CLIPPING_BY_NORM)
    # _____________________________________________________________________________________________compute gradients

    # train_op
    train_op = optimizer.apply_gradients(grads_and_vars=gradients,
                                         global_step=global_step)
    summary_op = tf.summary.merge_all()
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    restorer, restore_ckpt = faster_rcnn.get_restorer()
    saver = tf.train.Saver(max_to_keep=30)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        sess.run(init_op)
        if not restorer is None:
            restorer.restore(sess, restore_ckpt)
            print('restore model')
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess, coord)

        summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION)
        # tools.mkdir(summary_path)
        if not os.path.exists(summary_path):
            os.makedirs(summary_path)
        summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph)

        for step in range(cfgs.MAX_ITERATION):
            training_time = time.strftime('%Y-%m-%d %H:%M:%S',
                                          time.localtime(time.time()))

            if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0:
                _, global_stepnp = sess.run([train_op, global_step])

            else:
                if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0:
                    start = time.time()

                    _, global_stepnp, img_name, rpnLocLoss, rpnClsLoss, rpnTotalLoss, \
                    fastrcnnLocLoss, fastrcnnClsLoss, fastrcnnTotalLoss, totalLoss = \
                        sess.run(
                            [train_op, global_step, img_name_batch, rpn_location_loss, rpn_cls_loss, rpn_total_loss,
                             fastrcnn_loc_loss, fastrcnn_cls_loss, fastrcnn_total_loss, total_loss])

                    end = time.time()
                    print(""" {}: step{}    image_name:{} |\t
                              rpn_loc_loss:{} |\t rpn_cla_loss:{} |\t rpn_total_loss:{} |
                              fast_rcnn_loc_loss:{} |\t fast_rcnn_cla_loss:{} |\t fast_rcnn_total_loss:{} |
                              total_loss:{} |\t per_cost_time:{}s""" \
                          .format(training_time, global_stepnp, str(img_name[0]), rpnLocLoss, rpnClsLoss,
                                  rpnTotalLoss, fastrcnnLocLoss, fastrcnnClsLoss, fastrcnnTotalLoss, totalLoss,
                                  (end - start)))
                else:
                    if step % cfgs.SMRY_ITER == 0:
                        _, global_stepnp, summary_str = sess.run(
                            [train_op, global_step, summary_op])
                        summary_writer.add_summary(summary_str, global_stepnp)
                        summary_writer.flush()

            if (step > 0 and step % cfgs.SAVE_WEIGHTS_INTE
                    == 0) or (step == cfgs.MAX_ITERATION - 1):

                save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION)
                if not os.path.exists(save_dir):
                    os.makedirs(save_dir)

                save_ckpt = os.path.join(
                    save_dir, 'voc_' + str(global_stepnp) + 'model.ckpt')
                saver.save(sess, save_ckpt)
                print(' weights had been saved')

        coord.request_stop()
        coord.join(threads)
示例#15
0
文件: train.py 项目: cugszu/yuncong
def train():

    faster_rcnn = build_whole_network.DetectionNetwork(
        base_network_name=cfgs.NET_NAME, is_training=True)

    with tf.name_scope('get_batch'):
        img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch = \
            next_batch(dataset_name=cfgs.DATASET_NAME,  # 'pascal', 'coco'
                       batch_size=cfgs.BATCH_SIZE,
                       shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
                       is_training=True)
        gtboxes_and_label = tf.reshape(gtboxes_and_label_batch, [-1, 5])

    biases_regularizer = tf.no_regularizer
    weights_regularizer = tf.contrib.layers.l2_regularizer(cfgs.WEIGHT_DECAY)

    # list as many types of layers as possible, even if they are not used now
    with slim.arg_scope([
            slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose,
            slim.separable_conv2d, slim.fully_connected
    ],
                        weights_regularizer=weights_regularizer,
                        biases_regularizer=biases_regularizer,
                        biases_initializer=tf.constant_initializer(0.0)):
        # result_dict, losses_dict = faster_rcnn.build_whole_detection_network(input_img_batch=img_batch,
        #                                                                      gtboxes_batch=gtboxes_and_label)
        result_dict, losses_dict = faster_rcnn.build_whole_detection_network(
            input_img_batch=img_batch, gtboxes_batch=gtboxes_and_label)
    # ----------------------------------------------------------------------------------------------------build loss
    weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses())
    # weight_decay_loss = tf.add_n(tf.losses.get_regularization_losses())

    bbox_loss_m1 = losses_dict['bbox_loss_m1']
    cls_loss_m1 = losses_dict['cls_loss_m1']
    total_loss_m1 = bbox_loss_m1 + cls_loss_m1

    bbox_loss_m2 = losses_dict['bbox_loss_m2']
    cls_loss_m2 = losses_dict['cls_loss_m2']
    total_loss_m2 = bbox_loss_m2 + cls_loss_m2

    bbox_loss_m3 = losses_dict['bbox_loss_m3']
    cls_loss_m3 = losses_dict['cls_loss_m3']
    total_loss_m3 = bbox_loss_m3 + cls_loss_m3

    total_loss = total_loss_m1 + total_loss_m2 + total_loss_m3 + weight_decay_loss

    # ---------------------------------------------------------------------------------------------------add summary
    tf.summary.scalar('SSH_M1_LOSS/cls_loss_m1', cls_loss_m1)
    tf.summary.scalar('SSH_M1_LOSS/bbox_loss_m1', bbox_loss_m1)
    tf.summary.scalar('SSH_M1_LOSS/total_loss_m1', total_loss_m1)

    tf.summary.scalar('SSH_M2_LOSS/cls_loss_m2', cls_loss_m2)
    tf.summary.scalar('SSH_M2_LOSS/bbox_loss_m2', bbox_loss_m2)
    tf.summary.scalar('SSH_M2_LOSS/total_loss_m2', total_loss_m2)

    tf.summary.scalar('SSH_M3_LOSS/cls_loss_m3', cls_loss_m3)
    tf.summary.scalar('SSH_M3_LOSS/bbox_loss_m3', bbox_loss_m3)
    tf.summary.scalar('SSH_M3_LOSS/total_loss_m3', total_loss_m3)

    tf.summary.scalar('LOSS/total_loss', total_loss)
    tf.summary.scalar('LOSS/regular_weights', weight_decay_loss)

    gtboxes_in_img = show_box_in_tensor.draw_boxes_with_categories(
        img_batch=img_batch,
        boxes=gtboxes_and_label[:, :-1],
        labels=gtboxes_and_label[:, -1])
    if cfgs.ADD_BOX_IN_TENSORBOARD:

        detections_in_img_m1 = \
            show_box_in_tensor.draw_boxes_with_categories_and_scores(img_batch=img_batch,
                                                                     boxes=result_dict['final_bbox_m1'],
                                                                     labels=result_dict['final_category_m1'],
                                                                     scores=result_dict['final_scores_m1'])
        tf.summary.image('Compare/final_detection_m1', detections_in_img_m1)

        detections_in_img_m2 = \
            show_box_in_tensor.draw_boxes_with_categories_and_scores(img_batch=img_batch,
                                                                     boxes=result_dict['final_bbox_m2'],
                                                                     labels=result_dict['final_category_m2'],
                                                                     scores=result_dict['final_scores_m2'])
        tf.summary.image('Compare/final_detection_m2', detections_in_img_m2)

        detections_in_img_m3 = \
            show_box_in_tensor.draw_boxes_with_categories_and_scores(img_batch=img_batch,
                                                                     boxes=result_dict['final_bbox_m3'],
                                                                     labels=result_dict['final_category_m3'],
                                                                     scores=result_dict['final_scores_m3'])
        tf.summary.image('Compare/final_detection_m3', detections_in_img_m3)

    tf.summary.image('Compare/gtboxes', gtboxes_in_img)

    global_step = slim.get_or_create_global_step()
    lr = tf.train.piecewise_constant(
        global_step,
        boundaries=[
            np.int64(cfgs.DECAY_STEP[0]),
            np.int64(cfgs.DECAY_STEP[1])
        ],
        values=[cfgs.LR, cfgs.LR / 10., cfgs.LR / 100.])
    tf.summary.scalar('lr', lr)
    optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM)

    # ---------------------------------------------------------------------------------------------compute gradients
    gradients = faster_rcnn.get_gradients(optimizer, total_loss)

    # enlarge_gradients for bias
    if cfgs.MUTILPY_BIAS_GRADIENT:
        gradients = faster_rcnn.enlarge_gradients_for_bias(gradients)

    if cfgs.GRADIENT_CLIPPING_BY_NORM:
        with tf.name_scope('clip_gradients'):
            gradients = slim.learning.clip_gradient_norms(
                gradients, cfgs.GRADIENT_CLIPPING_BY_NORM)

    # train_op
    train_op = optimizer.apply_gradients(grads_and_vars=gradients,
                                         global_step=global_step)
    summary_op = tf.summary.merge_all()
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    restorer, restore_ckpt = faster_rcnn.get_restorer()
    saver = tf.train.Saver(max_to_keep=30)

    config = tf.ConfigProto()
    # config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        sess.run(init_op)
        if not restorer is None:
            restorer.restore(sess, restore_ckpt)
            print('restore model')
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess, coord)

        summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION)

        if not os.path.exists(summary_path):
            os.makedirs(summary_path)
        summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph)

        for step in range(cfgs.MAX_ITERATION):

            training_time = time.strftime('%Y-%m-%d %H:%M:%S',
                                          time.localtime(time.time()))

            # start = time.time()
            # _, global_stepnp, img_name, totalLoss, summary_str = \
            #     sess.run(
            #         [train_op, global_step, img_name_batch, total_loss, summary_op])
            #
            # end = time.time()
            #
            # print(""" {}: step{}    image_name:{} |\t total_loss:{} |\t per_cost_time:{}s""" \
            #       .format(training_time, global_stepnp, str(img_name[0]), totalLoss,
            #               (end - start)))
            # summary_writer.add_summary(summary_str, global_stepnp)
            # summary_writer.flush()

            if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0:
                _, global_stepnp = sess.run([train_op, global_step])

            else:
                if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0:
                    start = time.time()

                    _, global_stepnp, img_name, totalLoss = \
                        sess.run(
                            [train_op, global_step, img_name_batch, total_loss])

                    end = time.time()
                    print(""" {}: step{}    image_name:{} |\t total_loss:{} |\t per_cost_time:{}s""" \
                          .format(training_time, global_stepnp, str(img_name[0]), totalLoss,
                                  (end - start)))
                else:
                    if step % cfgs.SMRY_ITER == 0:
                        _, global_stepnp, summary_str = sess.run(
                            [train_op, global_step, summary_op])
                        summary_writer.add_summary(summary_str, global_stepnp)
                        summary_writer.flush()

            if (step > 0 and step % cfgs.SAVE_WEIGHTS_INTE
                    == 0) or (step == cfgs.MAX_ITERATION - 1):

                save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION)
                if not os.path.exists(save_dir):
                    os.makedirs(save_dir)

                save_ckpt = os.path.join(
                    save_dir, 'voc_' + str(global_stepnp) + 'model.ckpt')
                saver.save(sess, save_ckpt)
                print(' weights had been saved')

        coord.request_stop()
        coord.join(threads)