def input_fn():
     # We only support single batch when evaluation.
     assert (batch_size == 1)
     target_shape = [FLAGS.train_image_size] * 2
     image_preprocessing_fn=\
         lambda image_, labels_, bboxes_, quadrilaterals_:\
         textboxes_plusplus_preprocessing.preprocess_image(
             image_,
             labels_,
             bboxes_,
             quadrilaterals_,
             target_shape,
             is_training=is_training,
             data_format=FLAGS.data_format,
             output_rgb=False)
     image, file_name, shape, output_shape=\
         dataset_common.slim_get_batch(
             FLAGS.num_classes,
             batch_size,
             ('train' if is_training else 'val'),
             os.path.join(FLAGS.data_dir, dataset_pattern),
             FLAGS.num_readers,
             FLAGS.num_preprocessing_threads,
             image_preprocessing_fn,
             anchor_encoding_fn=None,
             num_epochs=1,
             is_training=is_training)
     return {
         'image': image,
         'file_name': file_name,
         'shape': shape,
         'output_shape': output_shape
     }, None
Exemplo n.º 2
0
    def input_fn():
        target_shape = [FLAGS.train_image_size] * 2
        image_preprocessing_fn = lambda image_, label_: cls_preprocessing.preprocess_image(image_, label_, target_shape, 
                                                                    is_training=is_training, data_format=FLAGS.data_format, 
                                                                    output_rgb=False)
        if FLAGS.location_feature_stage is not None:
            _batch_size = int(batch_size/2)
            print('Use cls and reg, so the batch size of each task is {}'.format(_batch_size))
            image1, _, cls_targets1, points1, is_reg1 = dataset_common.slim_get_batch(
                                                            FLAGS.num_classes,
                                                            _batch_size,
                                                            ('train' if is_training else 'val'),
                                                            os.path.join(FLAGS.data_dir, FLAGS.cls_data_dir, dataset_pattern),
                                                            int(FLAGS.num_readers/2),
                                                            int(FLAGS.num_preprocessing_threads/2),
                                                            image_preprocessing_fn,
                                                            num_epochs=FLAGS.train_epochs,
                                                            is_training=is_training)
            image2, _, cls_targets2, points2, is_reg2 = dataset_common.slim_get_batch(
                                                            FLAGS.num_classes,
                                                            _batch_size,
                                                            ('train' if is_training else 'val'),
                                                            os.path.join(FLAGS.data_dir, FLAGS.reg_data_dir, dataset_pattern),
                                                            int(FLAGS.num_readers/2),
                                                            int(FLAGS.num_preprocessing_threads/2),
                                                            image_preprocessing_fn,
                                                            num_epochs=FLAGS.train_epochs,
                                                            is_training=is_training)
            image, cls_targets, points, is_reg = [tf.concat([image1, image2],axis=0),
                                                            tf.concat([cls_targets1, cls_targets2],axis=0),
                                                            tf.concat([points1, points2],axis=0),
                                                            tf.concat([is_reg1, is_reg2],axis=0),]
        else:
            _batch_size = batch_size
            image, _, cls_targets, points, is_reg = dataset_common.slim_get_batch(
                                                            FLAGS.num_classes,
                                                            _batch_size,
                                                            ('train' if is_training else 'val'),
                                                            os.path.join(FLAGS.data_dir, dataset_pattern),
                                                            FLAGS.num_readers,
                                                            FLAGS.num_preprocessing_threads,
                                                            image_preprocessing_fn,
                                                            num_epochs=FLAGS.train_epochs,
                                                            is_training=is_training)

        return image, {'cls_targets': cls_targets, 'loc_targets': points, 'is_reg': is_reg}
Exemplo n.º 3
0
def input_fn(dataset_pattern='val-*', batch_size=1, data_location=None):
    out_shape = [SSD_VGG16_IMAGE_SIZE] * 2
    anchor_creator = anchor_manipulator.AnchorCreator(
        out_shape,
        layers_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)],
        anchor_scales=[(0.1, ), (0.2, ), (0.375, ), (0.55, ), (0.725, ),
                       (0.9, )],
        extra_anchor_scales=[(0.1414, ), (0.2739, ), (0.4541, ), (0.6315, ),
                             (0.8078, ), (0.9836, )],
        anchor_ratios=[(1., 2., .5), (1., 2., 3., .5, 0.3333),
                       (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333),
                       (1., 2., .5), (1., 2., .5)],
        layer_steps=[8, 16, 32, 64, 100, 300])
    all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors(
    )

    num_anchors_per_layer = []
    for ind in range(len(all_anchors)):
        num_anchors_per_layer.append(all_num_anchors_depth[ind] *
                                     all_num_anchors_spatial[ind])

    anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(
        allowed_borders=[1.0] * 6,
        positive_threshold=MATCH_THRESHOLD,
        ignore_threshold=NEG_THRESHOLD,
        prior_scaling=[0.1, 0.1, 0.2, 0.2])

    def image_preprocessing_fn(image_, labels_, bboxes_):
        return ssd_preprocessing.preprocess_image(image_,
                                                  labels_,
                                                  bboxes_,
                                                  out_shape,
                                                  is_training=False,
                                                  data_format=DATA_FORMAT,
                                                  output_rgb=False)

    def anchor_encoder_fn(glabels_, gbboxes_):
        return anchor_encoder_decoder.encode_all_anchors(
            glabels_, gbboxes_, all_anchors, all_num_anchors_depth,
            all_num_anchors_spatial)

    image, filename, shape, loc_targets, cls_targets, match_scores = \
        dataset_common.slim_get_batch(NUM_CLASSES,
                                      batch_size,
                                      'val',
                                      os.path.join(
                                          data_location,
                                          dataset_pattern),
                                      NUM_READERS,
                                      NUM_PREPROCESSING_THREADS,
                                      image_preprocessing_fn,
                                      anchor_encoder_fn,
                                      num_epochs=1,
                                      is_training=False)
    return image, filename, shape
Exemplo n.º 4
0
    def input_fn():
        target_shape = [FLAGS.train_image_size] * 2

        anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(positive_threshold = FLAGS.match_threshold,
                                                        ignore_threshold = FLAGS.neg_threshold,
                                                        prior_scaling=[0.1, 0.1, 0.2, 0.2])

        all_anchor_scales = [(16.,), (32.,), (64.,), (128.,), (256.,), (512.,)]
        all_extra_scales = [(), (), (), (), (), ()]
        all_anchor_ratios = [(1.,), (1.,), (1.,), (1.,), (1.,), (1.,)]
        all_layer_shapes = [(160, 160), (80, 80), (40, 40), (20, 20), (10, 10), (5, 5)]
        all_layer_strides = [4, 8, 16, 32, 64, 128]
        offset_list = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
        total_layers = len(all_layer_shapes)
        anchors_height = list()
        anchors_width = list()
        anchors_depth = list()
        for ind in range(total_layers):
            _anchors_height, _anchors_width, _anchor_depth = anchor_encoder_decoder.get_anchors_width_height(all_anchor_scales[ind], all_extra_scales[ind], all_anchor_ratios[ind], name='get_anchors_width_height{}'.format(ind))
            anchors_height.append(_anchors_height)
            anchors_width.append(_anchors_width)
            anchors_depth.append(_anchor_depth)
        anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax, inside_mask = anchor_encoder_decoder.get_all_anchors(target_shape, anchors_height, anchors_width, anchors_depth,
                                                                        offset_list, all_layer_shapes, all_layer_strides,
                                                                        [FLAGS.train_image_size * 1.] * total_layers, [False] * total_layers)

        num_anchors_per_layer = list()
        for ind, layer_shape in enumerate(all_layer_shapes):
            _, _num_anchors_per_layer = anchor_encoder_decoder.get_anchors_count(anchors_depth[ind], layer_shape, name='get_anchor_count{}'.format(ind))
            num_anchors_per_layer.append(_num_anchors_per_layer)

        image_preprocessing_fn = lambda image_, bboxes_ : sfd_preprocessing.preprocess_image(image_, bboxes_, target_shape, is_training=is_training, data_format=FLAGS.data_format, output_rgb=False)
        anchor_encoder_fn = lambda gbboxes_: anchor_encoder_decoder.encode_anchors(gbboxes_, anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax, inside_mask, match_mining=True)

        image, filename, shape, loc_targets, cls_targets, match_scores, _ = dataset_common.slim_get_batch(FLAGS.num_classes,
                                                                                batch_size,
                                                                                ('train' if is_training else 'valid'),
                                                                                os.path.join(FLAGS.data_dir, dataset_pattern),
                                                                                FLAGS.num_readers,
                                                                                FLAGS.num_preprocessing_threads,
                                                                                image_preprocessing_fn,
                                                                                anchor_encoder_fn,
                                                                                num_epochs=FLAGS.train_epochs,
                                                                                is_training=is_training)
        global global_anchor_info
        global_anchor_info = {'decode_fn': lambda pred : anchor_encoder_decoder.batch_decode_anchors(pred, anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax),
                            'num_anchors_per_layer': num_anchors_per_layer,
                            'all_num_anchors_depth': anchors_depth }

        return image, {'filename': filename, 'shape': shape, 'loc_targets': loc_targets, 'cls_targets': cls_targets, 'match_scores': match_scores}
Exemplo n.º 5
0
    def input_fn():
        out_shape = [FLAGS.train_image_size] * 2
        ssd300_anchor_params = {'layers_shapes': [(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)], 'anchor_scales': [(0.1,), (0.2,), (0.375,), (0.55,), (0.725,), (0.9,)],
                                                    'extra_anchor_scales': [(0.1414,), (0.2739,), (0.4541,), (0.6315,), (0.8078,), (0.9836,)],
                                                    'anchor_ratios': [(1., 2., .5), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., .5), (1., 2., .5)],
                                                    'layer_steps': [8, 16, 32, 64, 100, 300]}
        ssd512_anchor_params = {'layers_shapes': [(64, 64), (32, 32), (16, 16), (8, 8), (4, 4), (2, 2), (1, 1)],
                                                    'anchor_scales': [(0.07,), (0.15,), (0.3,), (0.45,), (0.6,), (0.75,), (0.9,)],
                                                    'extra_anchor_scales': [(0.1025,), (0.2121,), (0.3674,), (0.5196,), (0.6708,), (0.8216,), (0.9721,)],
                                                    'anchor_ratios': [(1., 2., .5), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., .5), (1., 2., .5)],
                                                    'layer_steps': [8, 16, 32, 64, 128, 256, 512]}
        if FLAGS.train_image_size == 512:
            net_params = ssd512_anchor_params
            print('using ssd512 model')
        else:
            net_params = ssd300_anchor_params
            print('using ssd300 model')
        anchor_creator = anchor_manipulator.AnchorCreator(out_shape, **net_params)
        all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors()

        num_anchors_per_layer = []
        for ind in range(len(all_anchors)):
            num_anchors_per_layer.append(all_num_anchors_depth[ind] * all_num_anchors_spatial[ind])

        anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(allowed_borders = [1.0] * len(net_params['layer_steps']),
                                                            positive_threshold = FLAGS.match_threshold,
                                                            ignore_threshold = FLAGS.neg_threshold,
                                                            prior_scaling=[0.1, 0.1, 0.2, 0.2])

        image_preprocessing_fn = lambda image_, labels_, bboxes_ : ssd_preprocessing.preprocess_image(image_, labels_, bboxes_, out_shape, is_training=is_training, data_format=FLAGS.data_format, output_rgb=False)
        anchor_encoder_fn = lambda glabels_, gbboxes_: anchor_encoder_decoder.encode_all_anchors(glabels_, gbboxes_, all_anchors, all_num_anchors_depth, all_num_anchors_spatial)

        image, filename, shape, loc_targets, cls_targets, match_scores = dataset_common.slim_get_batch(FLAGS.num_classes,
                                                                                batch_size,
                                                                                ('train' if is_training else 'val'),
                                                                                os.path.join(FLAGS.data_dir, dataset_pattern),
                                                                                FLAGS.num_readers,
                                                                                FLAGS.num_preprocessing_threads,
                                                                                image_preprocessing_fn,
                                                                                anchor_encoder_fn,
                                                                                num_epochs=FLAGS.train_epochs,
                                                                                is_training=is_training)
        global global_anchor_info
        global_anchor_info = {'decode_fn': lambda pred : anchor_encoder_decoder.decode_all_anchors(pred, num_anchors_per_layer),
                            'num_anchors_per_layer': num_anchors_per_layer,
                            'all_num_anchors_depth': all_num_anchors_depth }

        return {'image': image, 'filename': filename, 'shape': shape, 'loc_targets': loc_targets, 'cls_targets': cls_targets, 'match_scores': match_scores}, None
Exemplo n.º 6
0
 def input_fn():
     assert batch_size==1, 'We only support single batch when evaluation.'
     target_shape = [FLAGS.train_image_size] * 2
     image_preprocessing_fn = lambda image_, label_: cls_preprocessing.preprocess_image(image_, label_, target_shape, 
                                                                 is_training=is_training, data_format=FLAGS.data_format, 
                                                                 output_rgb=False)
     image, filename, label, points, is_reg = dataset_common.slim_get_batch(
                                                         FLAGS.num_classes,
                                                         batch_size,
                                                         ('train' if is_training else 'val'),
                                                         os.path.join(FLAGS.data_dir, dataset_pattern),
                                                         FLAGS.num_readers,
                                                         FLAGS.num_preprocessing_threads,
                                                         image_preprocessing_fn,
                                                         num_epochs=1,
                                                         is_training=is_training)
     return {'image': image, 'filename': filename, 'label': label, 'points':points, 'is_reg': is_reg}, None
Exemplo n.º 7
0
    def input_fn():
        out_shape = [FLAGS.train_image_size] * 2
        anchor_creator = anchor_manipulator.AnchorCreator(out_shape,
                                                    layers_shapes = [(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)],
                                                    anchor_scales = [(0.1,), (0.2,), (0.375,), (0.55,), (0.725,), (0.9,)],
                                                    extra_anchor_scales = [(0.1414,), (0.2739,), (0.4541,), (0.6315,), (0.8078,), (0.9836,)],
                                                    anchor_ratios = [(1., 2., .5), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., .5), (1., 2., .5)],
                                                    layer_steps = [8, 16, 32, 64, 100, 300])
        all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors()
        # all_anchors: [[(38x38x1),(38x38x1),(4x1),(4x1)],[(19x19x1),(19x19x1),(4x1),(4x1)]... ] -> recording all the anchors information

        num_anchors_per_layer = []
        for ind in range(len(all_anchors)):
            num_anchors_per_layer.append(all_num_anchors_depth[ind] * all_num_anchors_spatial[ind])

        anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(allowed_borders = [1.0] * 6,
                                                            positive_threshold = FLAGS.match_threshold,
                                                            ignore_threshold = FLAGS.neg_threshold,
                                                            prior_scaling=[0.1, 0.1, 0.2, 0.2])

        image_preprocessing_fn = lambda image_, labels_, bboxes_ : ssd_preprocessing.preprocess_image(image_, labels_, bboxes_, out_shape, is_training=is_training, data_format=FLAGS.data_format, output_rgb=False)
        
        anchor_encoder_fn = lambda glabels_, gbboxes_: anchor_encoder_decoder.encode_all_anchors(glabels_, gbboxes_, all_anchors, all_num_anchors_depth, all_num_anchors_spatial)
        
        anchor_decoder_fn = lambda pred : anchor_encoder_decoder.decode_all_anchors(pred, num_anchors_per_layer)

        image, _, shape, loc_targets, cls_targets, match_scores = dataset_common.slim_get_batch(FLAGS.num_classes,
                                                                                batch_size,
                                                                                ('train' if is_training else 'val'),
                                                                                os.path.join(FLAGS.data_dir, dataset_pattern),
                                                                                FLAGS.num_readers,
                                                                                FLAGS.num_preprocessing_threads,
                                                                                image_preprocessing_fn,
                                                                                anchor_encoder_fn,
                                                                                num_epochs=FLAGS.train_epochs,
                                                                                is_training=is_training)
        global global_anchor_info
        global_anchor_info = {'decode_fn': anchor_decoder_fn,
                            'num_anchors_per_layer': num_anchors_per_layer,
                            'all_num_anchors_depth': all_num_anchors_depth }

        return image, {'shape': shape, 'loc_targets': loc_targets, 'cls_targets': cls_targets, 'match_scores': match_scores}
Exemplo n.º 8
0
 def input_fn():
     out_shape = [args.train_image_size] * 2
     anchor_creator = anchor_manipulator.AnchorCreator(
         out_shape,
         layers_shapes=[(50, 50), (25, 25), (13, 13), (7, 7), (3, 3),
                        (3, 3)],
         anchor_scales=[(0.1, ), (0.2, ), (0.375, ), (0.55, ), (0.725, ),
                        (0.9, )],
         extra_anchor_scales=[(0.1414, ), (0.2739, ), (0.4541, ),
                              (0.6315, ), (0.8078, ), (0.9836, )],
         anchor_ratios=[(1., 2., .5), (1., 2., 3., .5, 0.3333),
                        (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333),
                        (1., 2., .5), (1., 2., .5)],
         layer_steps=[24, 48, 92, 171, 400, 400])
     all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors(
     )
     num_anchors_per_layer = []
     for ind in range(len(all_anchors)):
         num_anchors_per_layer.append(all_num_anchors_depth[ind] *
                                      all_num_anchors_spatial[ind])
     anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(
         allowed_borders=[1.0] * 6,
         positive_threshold=args.match_threshold,
         ignore_threshold=args.neg_threshold,
         prior_scaling=[0.1, 0.1, 0.2, 0.2])
     image_preprocessing_fn = lambda image_, labels_, bboxes_: ssd_preprocessing.preprocess_image(
         image_,
         labels_,
         bboxes_,
         out_shape,
         is_training=is_training,
         data_format=args.data_format,
         output_rgb=True)
     anchor_encoder_fn = lambda glabels_, gbboxes_: anchor_encoder_decoder.encode_all_anchors(
         glabels_, gbboxes_, all_anchors, all_num_anchors_depth,
         all_num_anchors_spatial)
     image, filename, shape, loc_targets, cls_targets, match_scores = dataset_common.slim_get_batch(
         args.num_classes,
         batch_size, ('train' if is_training else 'val'),
         os.path.join(args.data_dir, dataset_pattern),
         args.num_readers,
         args.num_preprocessing_threads_mine,
         image_preprocessing_fn,
         anchor_encoder_fn,
         num_epochs=args.train_epochs,
         is_training=is_training)
     global global_anchor_info
     global_anchor_info = {
         'decode_fn':
         lambda pred: anchor_encoder_decoder.decode_all_anchors(
             pred, num_anchors_per_layer),
         'num_anchors_per_layer':
         num_anchors_per_layer,
         'all_num_anchors_depth':
         all_num_anchors_depth
     }
     return {
         'image': image,
         'filename': filename,
         'shape': shape,
         'loc_targets': loc_targets,
         'cls_targets': cls_targets,
         'match_scores': match_scores
     }, None
    def input_fn():
        target_shape = [FLAGS.train_image_size] * 2

        anchor_processor =\
            anchor_manipulator.AnchorProcessor(
                positive_threshold=FLAGS.match_threshold,
                ignore_threshold=FLAGS.neg_threshold,
                prior_scaling=config.PRIOR_SCALING)
        # anchor_processor: Python object

        anchor_heights_all_layers,\
            anchor_widths_all_layers,\
            num_anchors_per_location_all_layers =\
            anchor_processor.get_anchors_size_all_layers(
                config.ALL_ANCHOR_SCALES,
                config.ALL_EXTRA_SCALES,
                config.ALL_ANCHOR_RATIOS,
                config.NUM_FEATURE_LAYERS)
        # anchor_heights_all_layers: [1d-tf.constant tf.float32,
        #                           1d-tf.constant tf.float32,
        #                           ...]
        # anchor_widths_all_layers: [1d-tf.constant tf.float32,
        #                           1d-tf.constant tf.float32,
        #                           ...]
        # num_anchors_per_location_all_layers: [Python int, Python int, ...]

        anchors_ymin,\
            anchors_xmin,\
            anchors_ymax,\
            anchors_xmax,\
            inside_mask =\
            anchor_processor.get_all_anchors_all_layers(
                target_shape,
                anchor_heights_all_layers,
                anchor_widths_all_layers,
                num_anchors_per_location_all_layers,
                config.ANCHOR_OFFSETS,
                config.VERTICAL_OFFSETS,
                config.ALL_LAYER_SHAPES,
                config.ALL_LAYER_STRIDES,
                [FLAGS.train_image_size * 1.] * config.NUM_FEATURE_LAYERS,
                [False] * config.NUM_FEATURE_LAYERS)
        # anchors_ymin: 1d-tf.Tensor(num_anchors_all_layers) tf.float32
        # inside_mask: 1d-tf.Tensor(num_anchors_all_layers) tf.bool

        num_anchors_per_layer = []
        for ind, layer_shape in enumerate(config.ALL_LAYER_SHAPES):
            _, _num_anchors_per_layer =\
                anchor_processor.count_num_anchors_per_layer(
                    num_anchors_per_location_all_layers[ind],
                    layer_shape,
                    name='count_num_anchors_per_layer_{}'.format(ind))
            num_anchors_per_layer.append(_num_anchors_per_layer)
        # num_anchors_per_layer = [num_anchors_layer1, num_anchors_layer2, ...]
        # e.g., num_anchors_per_layer = [48 x 48 x 2 x 10, ...]

        def image_preprocessing_fn(image_, labels_, bboxes_, quadrilaterals_):
            return textboxes_plusplus_preprocessing.preprocess_image(
                image_,
                labels_,
                bboxes_,
                quadrilaterals_,
                target_shape,
                is_training=is_training,
                data_format=FLAGS.data_format,
                output_rgb=False)

        def anchor_encoder_fn(glabels_, gbboxes_, gquadrilaterals_):
            return anchor_processor.encode_anchors(
                glabels_,
                gbboxes_,
                gquadrilaterals_,
                anchors_ymin,
                anchors_xmin,
                anchors_ymax,
                anchors_xmax,
                inside_mask)
        image, _, shape, loc_targets, cls_targets, match_scores =\
            dataset_common.slim_get_batch(
                FLAGS.num_classes,
                batch_size,
                ('train' if is_training else 'val'),
                os.path.join(FLAGS.data_dir, dataset_pattern),
                FLAGS.num_readers,
                FLAGS.num_preprocessing_threads,
                image_preprocessing_fn,
                anchor_encoder_fn,
                num_epochs=FLAGS.train_epochs,
                is_training=is_training)

        global global_anchor_info
        global_anchor_info =\
            {'decode_fn':
             lambda pred: anchor_processor.batch_decode_anchors(
                pred,
                anchors_ymin,
                anchors_xmin,
                anchors_ymax,
                anchors_xmax),
             'num_anchors_per_layer': num_anchors_per_layer,
             'num_anchors_per_location_all_layers':
                num_anchors_per_location_all_layers}

        return image,\
            {'shape': shape,  # original shape from .tfrecord files
             'loc_targets': loc_targets,  # [bs, n_anchors, 12]
             'cls_targets': cls_targets,  # [bs, n_anchors]
             'match_scores': match_scores  # [bs, n_anchors]
             }
Exemplo n.º 10
0
    def input_fn():
        out_shape = [300, 510]  #[FLAGS.train_image_size] * 2
        anchor_creator = anchor_manipulator.AnchorCreator(
            out_shape,
            layers_shapes=[(38, 64), (19, 32), (10, 16), (5, 8), (3, 6),
                           (1, 4)],
            anchor_scales=[(0.05, ), (0.1, ), (0.2, ), (0.3, ), (0.4, ),
                           (0.5, )],
            extra_anchor_scales=[(0.07, ), (0.1414, ), (0.245, ), (0.346, ),
                                 (0.447, ), (0.547, )],
            anchor_ratios=[(1., ), (1., ), (1., ), (1., ), (1., ), (1., )],
            #anchor_ratios = [(2., .5), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333), (2., .5), (2., .5)],
            layer_steps=[8, 16, 32, 64, 100, 300])
        all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors(
        )

        num_anchors_per_layer = []
        for ind in range(len(all_anchors)):
            num_anchors_per_layer.append(all_num_anchors_depth[ind] *
                                         all_num_anchors_spatial[ind])

        anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(
            allowed_borders=[1.0] * 6,
            positive_threshold=FLAGS.match_threshold,
            ignore_threshold=FLAGS.neg_threshold,
            prior_scaling=[0.1, 0.1, 0.2, 0.2])

        image_preprocessing_fn = lambda image_, labels_, bboxes_: ssd_preprocessing.preprocess_image(
            image_,
            labels_,
            bboxes_,
            out_shape,
            is_training=is_training,
            data_format=FLAGS.data_format,
            output_rgb=False)
        anchor_encoder_fn = lambda glabels_, gbboxes_: anchor_encoder_decoder.encode_all_anchors(
            glabels_, gbboxes_, all_anchors, all_num_anchors_depth,
            all_num_anchors_spatial)

        image, filename, shape, loc_targets, cls_targets, match_scores = dataset_common.slim_get_batch(
            FLAGS.num_classes,
            batch_size, ('train' if is_training else 'val'),
            os.path.join(FLAGS.data_dir, dataset_pattern),
            FLAGS.num_readers,
            FLAGS.num_preprocessing_threads,
            image_preprocessing_fn,
            anchor_encoder_fn,
            num_epochs=FLAGS.train_epochs,
            is_training=is_training)
        global global_anchor_info
        global_anchor_info = {
            'decode_fn':
            lambda pred: anchor_encoder_decoder.decode_all_anchors(
                pred, num_anchors_per_layer),
            'num_anchors_per_layer':
            num_anchors_per_layer,
            'all_num_anchors_depth':
            all_num_anchors_depth
        }

        return {
            'image': image,
            'filename': filename,
            'shape': shape,
            'loc_targets': loc_targets,
            'cls_targets': cls_targets,
            'match_scores': match_scores
        }, None
Exemplo n.º 11
0
    def input_fn():
        target_shape = [FLAGS.train_image_size] * 2

        anchor_processor =\
            anchor_manipulator.AnchorProcessor(
                positive_threshold=FLAGS.match_threshold,
                ignore_threshold=FLAGS.neg_threshold,
                prior_scaling=config.PRIOR_SCALING)

        anchor_heights_all_layers,\
            anchor_widths_all_layers,\
            num_anchors_per_location_all_layers =\
            anchor_processor.get_anchors_size_all_layers(
                config.ALL_ANCHOR_SCALES,
                config.ALL_EXTRA_SCALES,
                config.ALL_ANCHOR_RATIOS,
                config.NUM_FEATURE_LAYERS)

        # shape = (num_anchors_all_layers,).
        anchors_ymin,\
            anchors_xmin,\
            anchors_ymax,\
            anchors_xmax,\
            inside_mask =\
            anchor_processor.get_all_anchors_all_layers(
                target_shape,
                anchor_heights_all_layers,
                anchor_widths_all_layers,
                num_anchors_per_location_all_layers,
                config.ANCHOR_OFFSETS,
                config.VERTICAL_OFFSETS,
                config.ALL_LAYER_SHAPES,
                config.ALL_LAYER_STRIDES,
                [FLAGS.train_image_size * 1.] * config.NUM_FEATURE_LAYERS,
                [False] * config.NUM_FEATURE_LAYERS)

        num_anchors_per_layer = []
        for ind, layer_shape in enumerate(config.ALL_LAYER_SHAPES):
            _, _num_anchors_per_layer =\
                anchor_processor.count_num_anchors_per_layer(
                    num_anchors_per_location_all_layers[ind],
                    layer_shape,
                    name='count_num_anchors_per_layer_{}'.format(ind))
            num_anchors_per_layer.append(_num_anchors_per_layer)

        def image_preprocessing_fn(image_, labels_, bboxes_, quadrilaterals_):
            return textboxes_plusplus_preprocessing.preprocess_image(
                image_,
                labels_,
                bboxes_,
                quadrilaterals_,
                target_shape,
                is_training=is_training,
                data_format=FLAGS.data_format,
                output_rgb=False)

        def anchor_encoder_fn(glabels_, gbboxes_, gquadrilaterals_):
            return anchor_processor.encode_anchors(
                glabels_,
                gbboxes_,
                gquadrilaterals_,
                anchors_ymin,
                anchors_xmin,
                anchors_ymax,
                anchors_xmax,
                inside_mask)

        image, _, shape, loc_targets, cls_targets, match_scores =\
            dataset_common.slim_get_batch(
                FLAGS.num_classes,
                batch_size,
                (dataset_pattern[:-2]),
                os.path.join(FLAGS.data_dir, dataset_pattern),
                FLAGS.num_readers,
                FLAGS.num_preprocessing_threads,
                image_preprocessing_fn,
                anchor_encoder_fn,
                num_epochs=FLAGS.train_epochs,
                is_training=is_training)

        global global_anchor_info
        global_anchor_info =\
            {'decode_fn':
             lambda pred: anchor_processor.batch_decode_anchors(
                pred,
                anchors_ymin,
                anchors_xmin,
                anchors_ymax,
                anchors_xmax),
             'num_anchors_per_layer': num_anchors_per_layer,
             'num_anchors_per_location_all_layers':
                num_anchors_per_location_all_layers}

        return image,\
            {'shape': shape,
             'loc_targets': loc_targets,
             'cls_targets': cls_targets,
             'match_scores': match_scores
             }
Exemplo n.º 12
0
    def input_fn():
        #train_imgage_size = 300 [300, 300]
        target_shape = [FLAGS.train_image_size] * 2

        #match_threshold:0.5
        #neg_threshold:0.5
        anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(
            positive_threshold=FLAGS.match_threshold,
            ignore_threshold=FLAGS.neg_threshold,
            prior_scaling=[0.1, 0.1, 0.2, 0.2])

        all_anchor_scales = [(30., ), (60., ), (112.5, ), (165., ), (217.5, ),
                             (270., )]
        all_extra_scales = [(42.43, ), (82.17, ), (136.23, ), (189.45, ),
                            (242.34, ), (295.08, )]
        all_anchor_ratios = [(1., 2., .5), (1., 2., 3., .5, 0.3333),
                             (1., 2., 3., .5, 0.3333),
                             (1., 2., 3., .5, 0.3333), (1., 2., .5),
                             (1., 2., .5)]
        all_layer_shapes = [(38, 38), (19, 19), (10, 10), (5, 5), (3, 3),
                            (1, 1)]
        all_layer_strides = [8, 16, 32, 64, 100, 300]
        total_layers = len(all_layer_shapes)
        anchors_height = list()
        anchors_width = list()
        anchors_depth = list()
        for ind in range(total_layers):
            #若该层有n个default_prior_box则anchors_height是这些box的h,_anchor_depth是n
            _anchors_height, _anchors_width, _anchor_depth = anchor_encoder_decoder.get_anchors_width_height(
                all_anchor_scales[ind],
                all_extra_scales[ind],
                all_anchor_ratios[ind],
                name='get_anchors_width_height{}'.format(ind))
            anchors_height.append(_anchors_height)
            anchors_width.append(_anchors_width)
            anchors_depth.append(_anchor_depth)
        #anchors_ymin: [38*38*4 + 19*19*6 + 10*10*6 + 5*5*6 + 3*3*4 + 1*!*4]
        anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax, inside_mask = anchor_encoder_decoder.get_all_anchors(
            target_shape, anchors_height, anchors_width, anchors_depth,
            [0.5] * total_layers, all_layer_shapes, all_layer_strides,
            [FLAGS.train_image_size * 1.] * total_layers,
            [False] * total_layers)

        num_anchors_per_layer = list()
        #all_layer_shapes [(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)]
        for ind, layer_shape in enumerate(all_layer_shapes):
            #num_anchors_per_layer:layer_shape[0]*layer_layer[1]*anchors_depth
            _, _num_anchors_per_layer = anchor_encoder_decoder.get_anchors_count(
                anchors_depth[ind],
                layer_shape,
                name='get_anchor_count{}'.format(ind))
            num_anchors_per_layer.append(_num_anchors_per_layer)
        #num_anchors_per_layer:[38*38*4, 19*19*6, 10*10*6, 5*5*6, 3*3*4, 1*!*4]
        image_preprocessing_fn = lambda image_, labels_, bboxes_: ssd_preprocessing.preprocess_image(
            image_,
            labels_,
            bboxes_,
            target_shape,
            is_training=is_training,
            data_format=FLAGS.data_format,
            output_rgb=False)
        anchor_encoder_fn = lambda glabels_, gbboxes_: anchor_encoder_decoder.encode_anchors(
            glabels_, gbboxes_, anchors_ymin, anchors_xmin, anchors_ymax,
            anchors_xmax, inside_mask)

        image, _, shape, loc_targets, cls_targets, match_scores = dataset_common.slim_get_batch(
            FLAGS.num_classes,
            batch_size, ('train' if is_training else 'val'),
            os.path.join(FLAGS.data_dir, dataset_pattern),
            FLAGS.num_readers,
            FLAGS.num_preprocessing_threads,
            image_preprocessing_fn,
            anchor_encoder_fn,
            num_epochs=FLAGS.train_epochs,
            is_training=is_training)
        global global_anchor_info
        global_anchor_info = {
            'decode_fn':
            lambda pred: anchor_encoder_decoder.batch_decode_anchors(
                pred, anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax),
            'num_anchors_per_layer':
            num_anchors_per_layer,
            'all_num_anchors_depth':
            anchors_depth
        }

        return image, {
            'shape': shape,
            'loc_targets': loc_targets,
            'cls_targets': cls_targets,
            'match_scores': match_scores
        }