Пример #1
0
def save_image_with_bbox(image, labels_, scores_, bboxes_):
    if not hasattr(save_image_with_bbox, "counter"):
        save_image_with_bbox.counter = 0  # it doesn't exist yet, so initialize it
    save_image_with_bbox.counter += 1

    img_to_draw = np.copy(image).astype(np.uint8)

    img_to_draw = draw_toolbox.bboxes_draw_on_img(img_to_draw, labels_, scores_, bboxes_, thickness=2)
    imsave(os.path.join('./debug/{}.jpg').format(save_image_with_bbox.counter), img_to_draw)
    return save_image_with_bbox.counter
def save_image_with_bbox(image, labels_, scores_, bboxes_):
    if not hasattr(save_image_with_bbox, "counter"):
        save_image_with_bbox.counter = 0  # it doesn't exist yet, so initialize it
    save_image_with_bbox.counter += 1

    img_to_draw = np.copy(image)#common_preprocessing.np_image_unwhitened(image))
    if _IN_DEBUG:
        img_to_draw = draw_toolbox.bboxes_draw_on_img(img_to_draw, labels_, scores_, bboxes_, thickness=2)
        imsave(os.path.join('./Debug/{}.jpg').format(save_image_with_bbox.counter), img_to_draw)
    return save_image_with_bbox.counter#np.array([save_image_with_bbox.counter])
Пример #3
0
def save_image_with_bbox(image, labels_, scores_, bboxes_):
    # 在图片上标记bounding boxes
    if not hasattr(save_image_with_bbox, "counter"):
        save_image_with_bbox.counter = 0  # 如果不存在,就初始化一个
    save_image_with_bbox.counter += 1
    img_to_draw = np.copy(image)
    img_to_draw = draw_toolbox.bboxes_draw_on_img(img_to_draw,
                                                  labels_,
                                                  scores_,
                                                  bboxes_,
                                                  thickness=2)  # 画框
    imsave(
        os.path.join('./debug/{}.jpg').format(save_image_with_bbox.counter),
        img_to_draw)
    return save_image_with_bbox.counter
Пример #4
0
def save_image_with_bbox(image, labels_, scores_, bboxes_):
    # 存储带bounding boxes的图片
    if not hasattr(save_image_with_bbox, "counter"):
        save_image_with_bbox.counter = 0  # 如果不存在就初始化
    save_image_with_bbox.counter += 1
    img_to_draw = np.copy(image)
    img_to_draw = draw_toolbox.bboxes_draw_on_img(img_to_draw,
                                                  labels_,
                                                  scores_,
                                                  bboxes_,
                                                  thickness=2)
    imsave(
        os.path.join(
            '/home/yhq/Desktop/SSD-short/dataset/debug/{}.jpg').format(
                save_image_with_bbox.counter), img_to_draw)
    return save_image_with_bbox.counter
Пример #5
0
def main(_):
    with tf.Graph().as_default():
        out_shape = [FLAGS.train_image_size] * 2

        image_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
        shape_input = tf.placeholder(tf.int32, shape=(2, ))

        features = common_preprocessing.preprocess_for_test(
            image_input,
            out_shape,
            data_format=('NCHW'
                         if FLAGS.data_format == 'channels_first' else 'NHWC'))

        features = tf.expand_dims(features, axis=0)

        anchor_creator = anchor_manipulator_v2.AnchorCreator(
            out_shape,
            layers_shapes=[(24, 24), (12, 12), (6, 6)],
            anchor_scales=[(0.1, ), (0.2, 0.375, 0.55), (0.725, 0.9)],
            extra_anchor_scales=[(0.1414, ), (0.2739, 0.4541, 0.6315),
                                 (0.8078, 0.9836)],
            anchor_ratios=[(2., .5), (2., 3., .5, 0.3333), (2., .5)],
            layer_steps=[16, 32, 64])
        all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors(
        )

        anchor_encoder_decoder = anchor_manipulator_v2.AnchorEncoder(
            allowed_borders=[1.0] * 6,
            positive_threshold=None,
            ignore_threshold=None,
            prior_scaling=[0.1, 0.1, 0.2, 0.2])

        decode_fn = lambda pred: anchor_encoder_decoder.ext_decode_all_anchors(
            pred, all_anchors, all_num_anchors_depth, all_num_anchors_spatial)

        with tf.variable_scope(FLAGS.model_scope,
                               default_name=None,
                               values=[features],
                               reuse=tf.AUTO_REUSE):
            with tf.device('/gpu:0'):
                backbone = xdet_body_v4.xdet_resnet_v4(FLAGS.resnet_size,
                                                       FLAGS.data_format)
                backbone_outputs = backbone(inputs=features, is_training=False)

                cls_pred, location_pred = xdet_body_v4.xdet_head(
                    backbone_outputs,
                    FLAGS.num_classes,
                    all_num_anchors_depth,
                    False,
                    data_format=FLAGS.data_format)

                if FLAGS.data_format == 'channels_first':
                    cls_pred = [
                        tf.transpose(pred, [0, 2, 3, 1]) for pred in cls_pred
                    ]
                    location_pred = [
                        tf.transpose(pred, [0, 2, 3, 1])
                        for pred in location_pred
                    ]

                cls_pred = [
                    tf.reshape(pred, [-1, FLAGS.num_classes])
                    for pred in cls_pred
                ]
                location_pred = [
                    tf.reshape(pred, [-1, 4]) for pred in location_pred
                ]

                cls_pred = tf.concat(cls_pred, axis=0)
                location_pred = tf.concat(location_pred, axis=0)

        bboxes_pred = decode_fn(location_pred)
        bboxes_pred = tf.concat(bboxes_pred, axis=0)
        selected_bboxes, selected_scores = parse_by_class(
            cls_pred, bboxes_pred, FLAGS.num_classes, FLAGS.select_threshold,
            FLAGS.min_size, FLAGS.keep_topk, FLAGS.nms_topk,
            FLAGS.nms_threshold)

        labels_list = []
        scores_list = []
        bboxes_list = []
        for k, v in selected_scores.items():
            labels_list.append(tf.ones_like(v, tf.int32) * k)
            scores_list.append(v)
            bboxes_list.append(selected_bboxes[k])
        all_labels = tf.concat(labels_list, axis=0)
        all_scores = tf.concat(scores_list, axis=0)
        all_bboxes = tf.concat(bboxes_list, axis=0)

        summary_dir = os.path.join(FLAGS.model_dir, 'predict')
        if not os.path.exists(summary_dir):
            os.makedirs(summary_dir)

        all_images = tf.gfile.Glob(
            os.path.join(FLAGS.test_dataset_path, '*.jpg'))
        #print(all_images)
        len_images = len(all_images)
        config = tf.ConfigProto(allow_soft_placement=True,
                                log_device_placement=False)

        saver = tf.train.Saver()
        with tf.Session(config=config) as sess:
            init = tf.global_variables_initializer()
            sess.run(init)
            saver.restore(sess, get_checkpoint())

            for ind, image_name in enumerate(all_images):
                sys.stdout.write('\r>> Processing image %d/%d' %
                                 (ind + 1, len_images))
                sys.stdout.flush()

                np_image = imread(image_name)

                labels_, scores_, bboxes_ = sess.run(
                    [all_labels, all_scores, all_bboxes],
                    feed_dict={
                        image_input: np_image,
                        shape_input: np_image.shape[:-1]
                    })

                img_to_draw = draw_toolbox.bboxes_draw_on_img(np_image,
                                                              labels_,
                                                              scores_,
                                                              bboxes_,
                                                              thickness=2)
                imsave(
                    os.path.join(FLAGS.debug_dir,
                                 'output_{}.jpg'.format(image_name[-10:-4])),
                    img_to_draw)

            sys.stdout.write('\n')
            sys.stdout.flush()
Пример #6
0
def main(_):
    with tf.Graph().as_default():
        out_shape = [FLAGS.train_image_size] * 2

        image_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
        shape_input = tf.placeholder(tf.int32, shape=(2, ))

        features = ssd_preprocessing.preprocess_for_eval(
            image_input,
            out_shape,
            data_format=FLAGS.data_format,
            output_rgb=False)
        features = tf.expand_dims(features, axis=0)

        anchor_creator = anchor_manipulator.AnchorCreator(
            out_shape,
            layers_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3),
                           (1, 1)],
            anchor_scales=[(0.1, ), (0.2, ), (0.375, ), (0.55, ), (0.725, ),
                           (0.9, )],
            extra_anchor_scales=[(0.1414, ), (0.2739, ), (0.4541, ),
                                 (0.6315, ), (0.8078, ), (0.9836, )],
            anchor_ratios=[(2., .5),
                           (2., 3., .5, 0.3333), (2., 3., .5, 0.3333),
                           (2., 3., .5, 0.3333), (2., .5), (2., .5)],
            layer_steps=[8, 16, 32, 64, 100, 300])
        all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors(
        )

        anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(
            allowed_borders=[1.0] * 6,
            positive_threshold=None,
            ignore_threshold=None,
            prior_scaling=[0.1, 0.1, 0.2, 0.2])

        decode_fn = lambda pred: anchor_encoder_decoder.ext_decode_all_anchors(
            pred, all_anchors, all_num_anchors_depth, all_num_anchors_spatial)

        with tf.variable_scope(FLAGS.model_scope,
                               default_name=None,
                               values=[features],
                               reuse=tf.AUTO_REUSE):
            backbone = ssd_net.VGG16Backbone(FLAGS.data_format)
            feature_layers = backbone.forward(features, training=False)
            location_pred, cls_pred = ssd_net.multibox_head(
                feature_layers,
                FLAGS.num_classes,
                all_num_anchors_depth,
                data_format=FLAGS.data_format)
            if FLAGS.data_format == 'channels_first':
                cls_pred = [
                    tf.transpose(pred, [0, 2, 3, 1]) for pred in cls_pred
                ]
                location_pred = [
                    tf.transpose(pred, [0, 2, 3, 1]) for pred in location_pred
                ]

            cls_pred = [
                tf.reshape(pred, [-1, FLAGS.num_classes]) for pred in cls_pred
            ]
            location_pred = [
                tf.reshape(pred, [-1, 4]) for pred in location_pred
            ]

            cls_pred = tf.concat(cls_pred, axis=0)
            location_pred = tf.concat(location_pred, axis=0)

        with tf.device('/cpu:0'):
            bboxes_pred = decode_fn(location_pred)
            bboxes_pred = tf.concat(bboxes_pred, axis=0)
            selected_bboxes, selected_scores = parse_by_class(
                cls_pred, bboxes_pred, FLAGS.num_classes,
                FLAGS.select_threshold, FLAGS.min_size, FLAGS.keep_topk,
                FLAGS.nms_topk, FLAGS.nms_threshold)

            labels_list = []
            scores_list = []
            bboxes_list = []
            for k, v in selected_scores.items():
                labels_list.append(tf.ones_like(v, tf.int32) * k)
                scores_list.append(v)
                bboxes_list.append(selected_bboxes[k])
            all_labels = tf.concat(labels_list, axis=0)
            all_scores = tf.concat(scores_list, axis=0)
            all_bboxes = tf.concat(bboxes_list, axis=0)

        saver = tf.train.Saver()
        with tf.Session() as sess:
            init = tf.global_variables_initializer()
            sess.run(init)

            saver.restore(sess, get_checkpoint())

            np_image = imread('./demo/test.jpg')
            labels_, scores_, bboxes_ = sess.run(
                [all_labels, all_scores, all_bboxes],
                feed_dict={
                    image_input: np_image,
                    shape_input: np_image.shape[:-1]
                })

            img_to_draw = draw_toolbox.bboxes_draw_on_img(np_image,
                                                          labels_,
                                                          scores_,
                                                          bboxes_,
                                                          thickness=2)
            imsave('./demo/test_out.jpg', img_to_draw)
def main(_):
    with tf.Graph().as_default():
        out_shape = [FLAGS.train_image_size] * 2

        with tf.name_scope('define_input'):
            image_input = tf.placeholder(tf.uint8,
                                         shape=(None, None, 3),
                                         name='image_input')

        features = ssd_preprocessing.preprocess_for_eval(
            image_input,
            out_shape,
            data_format=FLAGS.data_format,
            output_rgb=False)
        features = tf.expand_dims(features, axis=0)

        anchor_creator = anchor_manipulator.AnchorCreator(
            out_shape,
            layers_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3),
                           (1, 1)],
            anchor_scales=[(0.1, ), (0.2, ), (0.375, ), (0.55, ), (0.725, ),
                           (0.9, )],
            extra_anchor_scales=[(0.1414, ), (0.2739, ), (0.4541, ),
                                 (0.6315, ), (0.8078, ), (0.9836, )],
            anchor_ratios=[(1., 2., .5), (1., 2., 3., .5, 0.3333),
                           (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333),
                           (1., 2., .5), (1., 2., .5)],
            #anchor_ratios = [(2., .5), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333),
            #(2., 3., .5, 0.3333), (2., .5), (2., .5)],
            layer_steps=[8, 16, 32, 64, 100, 300])
        all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors(
        )

        anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(
            allowed_borders=[1.0] * 6,
            positive_threshold=None,
            ignore_threshold=None,
            prior_scaling=[0.1, 0.1, 0.2, 0.2])

        def decode_fn(pred):
            return anchor_encoder_decoder.ext_decode_all_anchors(
                pred, all_anchors, all_num_anchors_depth,
                all_num_anchors_spatial)

        with tf.variable_scope(FLAGS.model_scope,
                               default_name=None,
                               values=[features],
                               reuse=tf.AUTO_REUSE):
            backbone = ssd_net.VGG16Backbone(FLAGS.data_format)
            feature_layers = backbone.forward(features, training=False)
            location_pred, cls_pred = ssd_net.multibox_head(
                feature_layers,
                FLAGS.num_classes,
                all_num_anchors_depth,
                data_format=FLAGS.data_format)
            if FLAGS.data_format == 'channels_first':
                cls_pred = [
                    tf.transpose(pred, [0, 2, 3, 1]) for pred in cls_pred
                ]
                location_pred = [
                    tf.transpose(pred, [0, 2, 3, 1]) for pred in location_pred
                ]

            cls_pred = [
                tf.reshape(pred, [-1, FLAGS.num_classes]) for pred in cls_pred
            ]
            location_pred = [
                tf.reshape(pred, [-1, 4]) for pred in location_pred
            ]

            with tf.variable_scope('cls_pred'):
                cls_pred = tf.concat(cls_pred, axis=0)
            with tf.variable_scope('location_pred'):
                location_pred = tf.concat(location_pred, axis=0)

        with tf.device('/cpu:0'):
            bboxes_pred = decode_fn(location_pred)
            bboxes_pred = tf.concat(bboxes_pred, axis=0)
            selected_bboxes, selected_scores = parse_by_class(
                cls_pred, bboxes_pred, FLAGS.num_classes,
                FLAGS.select_threshold, FLAGS.min_size, FLAGS.keep_topk,
                FLAGS.nms_topk, FLAGS.nms_threshold)

            labels_list = []
            scores_list = []
            bboxes_list = []
            for k, v in selected_scores.items():
                labels_list.append(tf.ones_like(v, tf.int32) * k)
                scores_list.append(v)
                bboxes_list.append(selected_bboxes[k])
            all_labels = tf.concat(labels_list, axis=0)
            all_scores = tf.concat(scores_list, axis=0)
            all_bboxes = tf.concat(bboxes_list, axis=0)

        saver = tf.train.Saver()
        '''
        config = tf.ConfigProto(allow_soft_placement=True, inter_op_parallelism_threads=1, intra_op_parallelism_threads=1)
        config.mlu_options.data_parallelism = 1
        config.mlu_options.model_parallelism = 1
        config.mlu_options.core_num = 1
        config.mlu_options.core_version = 'MLU270'
        config.mlu_options.precision = 'float'
        with tf.Session(config = config) as sess:
        '''
        with tf.Session() as sess:
            init = tf.global_variables_initializer()
            sess.run(init)

            saver.restore(sess, get_checkpoint())

            np_image = imread('demo/test.jpg')
            labels_, scores_, bboxes_ = sess.run(
                [all_labels, all_scores, all_bboxes],
                feed_dict={image_input: np_image})
            #print('labels_', labels_, type(labels_), labels_.shape)
            #print('scores_', scores_, type(scores_), scores_.shape)
            #print('bboxes_', bboxes_, type(bboxes_), bboxes_.shape, bboxes_.shape[0])

            img_to_draw = draw_toolbox.bboxes_draw_on_img(np_image,
                                                          labels_,
                                                          scores_,
                                                          bboxes_,
                                                          thickness=2)
            imsave('demo/test_out.jpg', img_to_draw)
            saver.save(sess, 'model/ssd300_vgg16/ssd300_vgg16', global_step=0)
Пример #8
0
def main(_):
    with tf.Graph().as_default():
        out_shape = [FLAGS.train_image_size] * 2

        image_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
        shape_input = tf.placeholder(tf.int32, shape=(2, ))

        features = ssd_preprocessing.preprocess_for_eval(
            image_input,
            out_shape,
            data_format=FLAGS.data_format,
            output_rgb=False)
        features = tf.expand_dims(features, axis=0)  #(N, W, H, C)

        anchor_creator = anchor_manipulator.AnchorCreator(
            out_shape,
            layers_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3),
                           (1, 1)],
            anchor_scales=[(0.1, ), (0.2, ), (0.375, ), (0.55, ), (0.725, ),
                           (0.9, )],
            extra_anchor_scales=[(0.1414, ), (0.2739, ), (0.4541, ),
                                 (0.6315, ), (0.8078, ), (0.9836, )],
            anchor_ratios=[(1., 2., .5), (1., 2., 3., .5, 0.3333),
                           (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333),
                           (1., 2., .5), (1., 2., .5)],
            #anchor_ratios = [(2., .5), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333), (2., .5), (2., .5)],
            layer_steps=[8, 16, 32, 64, 100, 300])
        all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors(
        )

        anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(
            allowed_borders=[1.0] * 6,
            positive_threshold=None,
            ignore_threshold=None,
            prior_scaling=[0.1, 0.1, 0.2, 0.2])

        decode_fn = lambda pred: anchor_encoder_decoder.ext_decode_all_anchors(
            pred, all_anchors, all_num_anchors_depth, all_num_anchors_spatial)

        with tf.variable_scope(FLAGS.model_scope,
                               default_name=None,
                               values=[features],
                               reuse=tf.AUTO_REUSE):
            #backbone = ssd_net.VGG16Backbone(FLAGS.data_format)
            backbone = ssd_net.MobileNetV2Backbone(FLAGS.data_format)
            feature_layers = backbone.forward(features, training=False)
            location_pred, cls_pred = ssd_net.multibox_head(
                feature_layers,
                FLAGS.num_classes,
                all_num_anchors_depth,
                data_format=FLAGS.data_format)

            if FLAGS.data_format == 'channels_first':
                cls_pred = [
                    tf.transpose(pred, [0, 2, 3, 1]) for pred in cls_pred
                ]
                location_pred = [
                    tf.transpose(pred, [0, 2, 3, 1]) for pred in location_pred
                ]

            cls_pred = [
                tf.reshape(pred, [-1, FLAGS.num_classes]) for pred in cls_pred
            ]
            location_pred = [
                tf.reshape(pred, [-1, 4]) for pred in location_pred
            ]

            cls_pred = tf.concat(cls_pred, axis=0)
            location_pred = tf.concat(location_pred, axis=0)

        with tf.device('/cpu:0'):
            bboxes_pred = decode_fn(location_pred)
            bboxes_pred = tf.concat(bboxes_pred, axis=0)  #
            selected_bboxes, selected_scores = parse_by_class(
                cls_pred, bboxes_pred, FLAGS.num_classes,
                FLAGS.select_threshold, FLAGS.min_size, FLAGS.keep_topk,
                FLAGS.nms_topk, FLAGS.nms_threshold)

            labels_list = []
            scores_list = []
            bboxes_list = []
            for k, v in selected_scores.items():
                labels_list.append(tf.ones_like(v, tf.int32) * k)
                scores_list.append(v)
                bboxes_list.append(selected_bboxes[k])
            all_labels = tf.concat(labels_list, axis=0)
            all_scores = tf.concat(scores_list, axis=0)
            all_bboxes = tf.concat(bboxes_list, axis=0)

        saver = tf.train.Saver()
        with tf.Session() as sess:
            init = tf.global_variables_initializer()
            sess.run(init)

            saver.restore(sess, get_checkpoint())

            while (video.isOpened()):
                ret, frame = video.read()
                if ret == False:
                    break
                else:
                    timer2 = cv2.getTickCount()
                    if undistort == 'y':
                        ######################################## Undistortion Parts ########################################
                        dim2 = None
                        dim3 = None

                        timer = cv2.getTickCount()

                        dim1 = frame.shape[:
                                           2][::
                                              -1]  # dim1 is the dimension of input image to un-distort
                        assert dim1[0] / dim1[1] == DIM[0] / DIM[
                            1], "Image to undistort needs to have same aspect ratio as the ones used in calibration"
                        if not dim2:
                            dim2 = dim1
                        if not dim3:
                            dim3 = dim1
                        scaled_K = K * dim1[0] / DIM[
                            0]  # The values of K is to scale with image dimension.
                        scaled_K[2][
                            2] = 1.0  # Except that K[2][2] is always 1.0
                        # This is how scaled_K, dim2 and balance are used to determine the final K used to un-distort image. OpenCV document failed to make this clear!

                        new_K = cv2.fisheye.estimateNewCameraMatrixForUndistortRectify(
                            scaled_K, D, dim2, np.eye(3), balance=0)
                        map1, map2 = cv2.fisheye.initUndistortRectifyMap(
                            scaled_K, D, np.eye(3), new_K, dim3, cv2.CV_16SC2)

                        frame_r = cv2.remap(frame,
                                            map1,
                                            map2,
                                            interpolation=cv2.INTER_LINEAR,
                                            borderMode=cv2.BORDER_CONSTANT)

                        t = (cv2.getTickCount() -
                             timer) / cv2.getTickFrequency()

                        # frame_r = cv2.resize(dst, (640, 360))
                        # frame_r = cv2.putText(frame_r, "Undistortion processing time: %.3f sec" % t, (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.5,(0, 255, 255), 2) #(1.0 / (end - start))
                        # ###############################################################################################

                    #np_image = imread('./demo/test.jpg')
                    labels_, scores_, bboxes_ = sess.run(
                        [all_labels, all_scores, all_bboxes],
                        feed_dict={
                            image_input: frame,
                            shape_input: frame.shape[:-1]
                        })

                    img_to_draw = draw_toolbox.bboxes_draw_on_img(frame,
                                                                  labels_,
                                                                  scores_,
                                                                  bboxes_,
                                                                  thickness=2)
                    fps = cv2.getTickFrequency() / (cv2.getTickCount() -
                                                    timer2)
                    img_to_draw = cv2.putText(img_to_draw, "FPS : %.1f" % fps,
                                              (10, 20),
                                              cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                              (0, 255, 255), 2)  # dst_r
                    cv2.imshow('Object detector', img_to_draw)  # dst_r
                    if cv2.waitKey(1) == ord('q'):
                        break
Пример #9
0
def ssd(path):
# def ssd_res(img_path):
    with tf.Graph().as_default():
        out_shape = [FLAGS.train_image_size] * 2

        image_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
        shape_input = tf.placeholder(tf.int32, shape=(2,))

        features = ssd_preprocessing.preprocess_for_eval(image_input, out_shape, data_format=FLAGS.data_format, output_rgb=False)
        features = tf.expand_dims(features, axis=0)

        anchor_creator = anchor_manipulator.AnchorCrealog_device_placementtor(out_shape,
                                                    layers_shapes = [(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)],
                                                    anchor_scales = [(0.1,), (0.2,), (0.375,), (0.55,), (0.725,), (0.9,)],
                                                    extra_anchor_scales = [(0.1414,), (0.2739,), (0.4541,), (0.6315,), (0.8078,), (0.9836,)],
                                                    anchor_ratios = [(1., 2., .5), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., .5), (1., 2., .5)],
                                                    #anchor_ratios = [(2., .5), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333), (2., .5), (2., .5)],
                                                    layer_steps = [8, 16, 32, 64, 100, 300])
        all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors()

        anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(allowed_borders = [1.0] * 6,
                                                            positive_threshold = None,
                                                            ignore_threshold = None,
                                                            prior_scaling=[0.1, 0.1, 0.2, 0.2])

        decode_fn = lambda pred : anchor_encoder_decoder.ext_decode_all_anchors(pred, all_anchors, all_num_anchors_depth, all_num_anchors_spatial)

        with tf.variable_scope(FLAGS.model_scope, default_name=None, values=[features], reuse=tf.AUTO_REUSE):
            backbone = ssd_net.VGG16Backbone(FLAGS.data_format)
            feature_layers = backbone.forward(features, training=False)
            location_pred, cls_pred = ssd_net.multibox_head(feature_layers, FLAGS.num_classes, all_num_anchors_depth, data_format=FLAGS.data_format)
            if FLAGS.data_format == 'channels_first':
                cls_pred = [tf.transpose(pred, [0, 2, 3, 1]) for pred in cls_pred]
                location_pred = [tf.transpose(pred, [0, 2, 3, 1]) for pred in location_pred]

            cls_pred = [tf.reshape(pred, [-1, FLAGS.num_classes]) for pred in cls_pred]
            location_pred = [tf.reshape(pred, [-1, 4]) for pred in location_pred]

            cls_pred = tf.concat(cls_pred, axis=0)
            location_pred = tf.concat(location_pred, axis=0)

        with tf.device('/cpu:0'):
            bboxes_pred = decode_fn(location_pred)
            bboxes_pred = tf.concat(bboxes_pred, axis=0)


            selected_bboxes, selected_scores = parse_by_class(cls_pred, bboxes_pred,
                                                            FLAGS.num_classes, FLAGS.select_threshold, FLAGS.min_size,
                                                            FLAGS.keep_topk, FLAGS.nms_topk, FLAGS.nms_threshold)



            labels_list = []
            scores_list = []
            bboxes_list = []
            for k, v in selected_scores.items():
                labels_list.append(tf.ones_like(v, tf.int32) * k)
                scores_list.append(v)
                bboxes_list.append(selected_bboxes[k])
            all_labels = tf.concat(labels_list, axis=0)
            all_scores = tf.concat(scores_list, axis=0)
            all_bboxes = tf.concat(bboxes_list, axis=0)

        saver = tf.train.Saver()
        with tf.Session() as sess:
            init = tf.global_variables_initializer()
            sess.run(init)
            saver.restore(sess, get_checkpoint())

            np_image = imread(path)
            im = Image.open(path)
            print(np_image.shape)

            labels_, scores_, bboxes_ = sess.run([all_labels, all_scores, all_bboxes], feed_dict = {image_input : np_image, shape_input : np_image.shape[:-1]})

            all_bboxes = sess.run([bboxes_pred], feed_dict = {image_input : np_image, shape_input : np_image.shape[:-1]})




            shape = np_image.shape
            for j in range(len(all_bboxes[0])):
                all_box = all_bboxes[0][j]
                p1 = (int(all_box[0] * shape[0]), int(all_box[1] * shape[1]))
                p2 = (int(all_box[2] * shape[0]), int(all_box[3] * shape[1]))
                if (p2[0] - p1[0] < 1) or (p2[1] - p1[1] < 1):
                    continue
                x1 = p1[1]
                y1 = p1[0]
                x2 = p2[1]
                y2 = p2[0]

                obj = im.crop((x1, y1, x2, y2))

                num_str = str(j)
                num_str = num_str.zfill(5)
                obj.save('./res/img/{}.jpg'.format(num_str))

                cor = str(x1) + ',' + str(y1) + ',' + str(x2) + ',' +str(y2)
                f2 = open('./res/cor.txt', 'a')
                f2.write(cor + '\n')


                zero_str = str(0)
                f = open('./res/label.txt', 'a')
                f.write(num_str + ',' + zero_str + '\n')
                f.close()

            num1 = 0
            for i in range(bboxes_.shape[0]):
                bbox = bboxes_[i]
                p1 = (int(bbox[0] * shape[0]), int(bbox[1] * shape[1]))
                p2 = (int(bbox[2] * shape[0]), int(bbox[3] * shape[1]))
                num1 = num1 + 1

                if (p2[0] - p1[0] < 1) or (p2[1] - p1[1] < 1):
                    continue
                x1 = p1[1]
                y1 = p1[0]
                x2 = p2[1]
                y2 = p2[0]



                cor1 = str(x1) + ',' + str(y1) + ',' + str(x2) + ',' + str(y2)


                num = 0
                with open('./res/cor.txt', 'r') as f11, open('./res/label.txt', '+r') as f22:
                    for line in f11:
                        num = num + 1
                        if cor1 in line:
                            num11 = str(num)
                            print(num11 + '\n')

                            num11 = num11.zfill(5)
                            ber = num11 + ',' + str(0)
                            aft = num11 + ',' + str(labels_[i])

                            t = f22.read()
                            t = t.replace(ber, aft)
                            f22.seek(0, 0)
                            f22.write(t)
            print(num1)

            img_to_draw = draw_toolbox.bboxes_draw_on_img(np_image, labels_, scores_, bboxes_, thickness=2)
            imsave('./demo/out.jpg', img_to_draw)
Пример #10
0
def main(_):
    with tf.Graph().as_default():
        out_shape = [FLAGS.train_image_size] * 2
        anchor_creator = anchor_manipulator.AnchorCreator(
            out_shape,
            layers_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3),
                           (1, 1)],
            anchor_scales=[(0.1, ), (0.2, ), (0.375, ), (0.55, ), (0.725, ),
                           (0.9, )],
            extra_anchor_scales=[(0.1414, ), (0.2739, ), (0.4541, ),
                                 (0.6315, ), (0.8078, ), (0.9836, )],
            anchor_ratios=[(1., 2., .5), (1., 2., 3., .5, 0.3333),
                           (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333),
                           (1., 2., .5), (1., 2., .5)],
            #anchor_ratios = [(2., .5), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333),
            #(2., 3., .5, 0.3333), (2., .5), (2., .5)],
            layer_steps=[8, 16, 32, 64, 100, 300])
        all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors(
        )

        anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(
            allowed_borders=[1.0] * 6,
            positive_threshold=None,
            ignore_threshold=None,
            prior_scaling=[0.1, 0.1, 0.2, 0.2])

        def decode_fn(pred):
            return anchor_encoder_decoder.ext_decode_all_anchors(
                pred, all_anchors, all_num_anchors_depth,
                all_num_anchors_spatial)

        with tf.name_scope('define_input'):
            image_input = tf.placeholder(tf.float32,
                                         shape=(1, 300, 300, 3),
                                         name='image_input')
        print('image_input', image_input)
        with tf.variable_scope(FLAGS.model_scope,
                               default_name=None,
                               values=[image_input],
                               reuse=tf.AUTO_REUSE):
            backbone = ssd_net.VGG16Backbone(FLAGS.data_format)
            feature_layers = backbone.forward(image_input, training=False)
            location_pred, cls_pred = ssd_net.multibox_head(
                feature_layers,
                FLAGS.num_classes,
                all_num_anchors_depth,
                data_format=FLAGS.data_format)
            if FLAGS.data_format == 'channels_first':
                cls_pred = [
                    tf.transpose(pred, [0, 2, 3, 1]) for pred in cls_pred
                ]
                location_pred = [
                    tf.transpose(pred, [0, 2, 3, 1]) for pred in location_pred
                ]

            cls_pred = [
                tf.reshape(pred, [-1, FLAGS.num_classes]) for pred in cls_pred
            ]
            location_pred = [
                tf.reshape(pred, [-1, 4]) for pred in location_pred
            ]

            with tf.variable_scope('cls_pred'):
                cls_pred = tf.concat(cls_pred, axis=0)
            with tf.variable_scope('location_pred'):
                location_pred = tf.concat(location_pred, axis=0)

        with tf.device('/cpu:0'):
            bboxes_pred = decode_fn(location_pred)
            bboxes_pred = tf.concat(bboxes_pred, axis=0)
            selected_bboxes, selected_scores = parse_by_class(
                cls_pred, bboxes_pred, FLAGS.num_classes,
                FLAGS.select_threshold, FLAGS.min_size, FLAGS.keep_topk,
                FLAGS.nms_topk, FLAGS.nms_threshold)

            labels_list = []
            scores_list = []
            bboxes_list = []
            for k, v in selected_scores.items():
                labels_list.append(tf.ones_like(v, tf.int32) * k)
                scores_list.append(v)
                bboxes_list.append(selected_bboxes[k])
            all_labels = tf.concat(labels_list, axis=0)
            all_scores = tf.concat(scores_list, axis=0)
            all_bboxes = tf.concat(bboxes_list, axis=0)

        saver = tf.train.Saver()
        '''
        config = tf.ConfigProto(allow_soft_placement=True, inter_op_parallelism_threads=1, intra_op_parallelism_threads=1)
        config.mlu_options.data_parallelism = 1
        config.mlu_options.model_parallelism = 1
        config.mlu_options.core_num = 1
        config.mlu_options.core_version = 'MLU270'
        config.mlu_options.precision = 'float'
        with tf.Session(config = config) as sess:
        '''
        with tf.Session() as sess:
            init = tf.global_variables_initializer()
            sess.run(init)

            saver.restore(sess, get_checkpoint())

            _R_MEAN = 123.68
            _G_MEAN = 116.78
            _B_MEAN = 103.94
            means = [
                _B_MEAN,
                _G_MEAN,
                _R_MEAN,
            ]
            np_image = cv2.imread('demo/test.jpg')
            image = cv2.resize(
                np_image, (FLAGS.train_image_size, FLAGS.train_image_size))
            image = (image - means)  # / 255.0
            image = np.expand_dims(image, axis=0)
            print('image', type(image), image.shape)
            '''
            image = tf.to_float(np_image)
            image = tf.image.resize_images(image, out_shape,
                                           method=tf.image.ResizeMethod.BILINEAR, align_corners=False)
            image.set_shape(out_shape + [3])
            num_channels = image.get_shape().as_list()[-1]
            channels = tf.split(axis=2, num_or_size_splits=num_channels, value=image)
            for i in range(num_channels):
                channels[i] -= means[i]
            image = tf.concat(axis=2, values=channels)
            image_channels = tf.unstack(image, axis=-1, name='split_rgb')
            image = tf.stack([image_channels[2], image_channels[1], image_channels[0]], axis=-1, name='merge_bgr')
            '''

            labels_, scores_, bboxes_ = sess.run(
                [all_labels, all_scores, all_bboxes],
                feed_dict={image_input: image})
            #print('labels_', labels_, type(labels_), labels_.shape)
            #print('scores_', scores_, type(scores_), scores_.shape)
            #print('bboxes_', bboxes_, type(bboxes_), bboxes_.shape, bboxes_.shape[0])

            img_to_draw = draw_toolbox.bboxes_draw_on_img(np_image,
                                                          labels_,
                                                          scores_,
                                                          bboxes_,
                                                          thickness=2)
            cv2.imwrite('demo/test_out.jpg', img_to_draw)
            saver.save(sess,
                       'model/ssd300_vgg16/ssd300_vgg16_short',
                       global_step=0)
Пример #11
0
        labels_list = []
        scores_list = []
        bboxes_list = []
        for k, v in selected_scores.items():
            labels_list.append(tf.ones_like(v, tf.int32) * k)
            scores_list.append(v)
            bboxes_list.append(selected_bboxes[k])
        all_labels = tf.concat(labels_list, axis=0)
        all_scores = tf.concat(scores_list, axis=0)
        all_bboxes = tf.concat(bboxes_list, axis=0)

print('sess2 start')
with tf.Session(graph=g2) as sess2:
    print('sess2 end')
    labels_, scores_, bboxes_ = sess2.run([all_labels, all_scores, all_bboxes],
                                          feed_dict={
                                              g2_cls_pred: cls_pred_,
                                              g2_location_pred: location_pred_
                                          })
    #print('labels_', labels_, type(labels_), labels_.shape)
    #print('scores_', scores_, type(scores_), scores_.shape)
    #print('bboxes_', bboxes_, type(bboxes_), bboxes_.shape, bboxes_.shape[0])

    img_to_draw = draw_toolbox.bboxes_draw_on_img(np_image,
                                                  labels_,
                                                  scores_,
                                                  bboxes_,
                                                  thickness=2)
    imsave('demo/test_out.jpg', img_to_draw)
Пример #12
0
def main(_):
    with tf.Graph().as_default():
        out_shape = [FLAGS.train_image_size] * 2

        image_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
        shape_input = tf.placeholder(tf.int32, shape=(2, ))

        features, output_shape = ssd_preprocessing.preprocess_for_eval(
            image_input,
            out_shape,
            data_format=FLAGS.data_format,
            output_rgb=False)
        features = tf.expand_dims(features, axis=0)
        output_shape = tf.expand_dims(output_shape, axis=0)

        all_anchor_scales = [(30., ), (60., ), (112.5, ), (165., ), (217.5, ),
                             (270., )]
        all_extra_scales = [(42.43, ), (82.17, ), (136.23, ), (189.45, ),
                            (242.34, ), (295.08, )]
        all_anchor_ratios = [(1., 2., .5), (1., 2., 3., .5, 0.3333),
                             (1., 2., 3., .5, 0.3333),
                             (1., 2., 3., .5, 0.3333), (1., 2., .5),
                             (1., 2., .5)]
        # all_anchor_ratios = [(2., .5), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333), (2., .5), (2., .5)]

        with tf.variable_scope(FLAGS.model_scope,
                               default_name=None,
                               values=[features],
                               reuse=tf.AUTO_REUSE):
            backbone = ssd_net.VGG16Backbone(FLAGS.data_format)
            feature_layers = backbone.forward(features, training=False)
            with tf.device('/cpu:0'):
                anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(
                    positive_threshold=None,
                    ignore_threshold=None,
                    prior_scaling=[0.1, 0.1, 0.2, 0.2])

                if FLAGS.data_format == 'channels_first':
                    all_layer_shapes = [
                        tf.shape(feat)[2:] for feat in feature_layers
                    ]
                else:
                    all_layer_shapes = [
                        tf.shape(feat)[1:3] for feat in feature_layers
                    ]
                all_layer_strides = [8, 16, 32, 64, 100, 300]
                total_layers = len(all_layer_shapes)
                anchors_height = list()
                anchors_width = list()
                anchors_depth = list()
                for ind in range(total_layers):
                    _anchors_height, _anchors_width, _anchor_depth = anchor_encoder_decoder.get_anchors_width_height(
                        all_anchor_scales[ind],
                        all_extra_scales[ind],
                        all_anchor_ratios[ind],
                        name='get_anchors_width_height{}'.format(ind))
                    anchors_height.append(_anchors_height)
                    anchors_width.append(_anchors_width)
                    anchors_depth.append(_anchor_depth)
                anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax, _ = anchor_encoder_decoder.get_all_anchors(
                    tf.squeeze(output_shape, axis=0), anchors_height,
                    anchors_width, anchors_depth, [0.5] * total_layers,
                    all_layer_shapes, all_layer_strides, [0.] * total_layers,
                    [False] * total_layers)
            location_pred, cls_pred = ssd_net.multibox_head(
                feature_layers,
                FLAGS.num_classes,
                anchors_depth,
                data_format=FLAGS.data_format)
            if FLAGS.data_format == 'channels_first':
                cls_pred = [
                    tf.transpose(pred, [0, 2, 3, 1]) for pred in cls_pred
                ]
                location_pred = [
                    tf.transpose(pred, [0, 2, 3, 1]) for pred in location_pred
                ]

            cls_pred = [
                tf.reshape(pred, [-1, FLAGS.num_classes]) for pred in cls_pred
            ]
            location_pred = [
                tf.reshape(pred, [-1, 4]) for pred in location_pred
            ]

            cls_pred = tf.concat(cls_pred, axis=0)
            location_pred = tf.concat(location_pred, axis=0)

        with tf.device('/cpu:0'):
            bboxes_pred = anchor_encoder_decoder.decode_anchors(
                location_pred, anchors_ymin, anchors_xmin, anchors_ymax,
                anchors_xmax)
            selected_bboxes, selected_scores = bbox_util.parse_by_class(
                tf.squeeze(output_shape, axis=0), cls_pred, bboxes_pred,
                FLAGS.num_classes, FLAGS.select_threshold, FLAGS.min_size,
                FLAGS.keep_topk, FLAGS.nms_topk, FLAGS.nms_threshold)

            labels_list = []
            scores_list = []
            bboxes_list = []
            for k, v in selected_scores.items():
                labels_list.append(tf.ones_like(v, tf.int32) * k)
                scores_list.append(v)
                bboxes_list.append(selected_bboxes[k])
            all_labels = tf.concat(labels_list, axis=0)
            all_scores = tf.concat(scores_list, axis=0)
            all_bboxes = tf.concat(bboxes_list, axis=0)

        saver = tf.train.Saver()
        with tf.Session() as sess:
            init = tf.global_variables_initializer()
            sess.run(init)

            saver.restore(sess, get_checkpoint())

            np_image = imread('./demo/test.jpg')
            labels_, scores_, bboxes_, output_shape_ = sess.run(
                [all_labels, all_scores, all_bboxes, output_shape],
                feed_dict={
                    image_input: np_image,
                    shape_input: np_image.shape[:-1]
                })
            bboxes_[:,
                    0] = bboxes_[:, 0] * np_image.shape[0] / output_shape_[0,
                                                                           0]
            bboxes_[:,
                    1] = bboxes_[:, 1] * np_image.shape[1] / output_shape_[0,
                                                                           1]
            bboxes_[:,
                    2] = bboxes_[:, 2] * np_image.shape[0] / output_shape_[0,
                                                                           0]
            bboxes_[:,
                    3] = bboxes_[:, 3] * np_image.shape[1] / output_shape_[0,
                                                                           1]

            img_to_draw = draw_toolbox.bboxes_draw_on_img(np_image,
                                                          labels_,
                                                          scores_,
                                                          bboxes_,
                                                          thickness=2)
            imsave('./demo/test_out.jpg', img_to_draw)
Пример #13
0
def main(_):
    with tf.Graph().as_default():
        target_shape = None

        image_input = tf.placeholder(tf.uint8, shape=(None, None, 3))

        features, output_shape = sfd_preprocessing.preprocess_for_eval(image_input, target_shape, data_format=FLAGS.data_format, output_rgb=False)
        features = tf.expand_dims(features, axis=0)
        output_shape = tf.expand_dims(output_shape, axis=0)

        all_anchor_scales = [(16.,), (32.,), (64.,), (128.,), (256.,), (512.,)]
        all_extra_scales = [(), (), (), (), (), ()]
        all_anchor_ratios = [(1.,), (1.,), (1.,), (1.,), (1.,), (1.,)]
        all_layer_strides = [4, 8, 16, 32, 64, 128]
        offset_list = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
        with tf.variable_scope(FLAGS.model_scope, default_name=None, values=[features], reuse=tf.AUTO_REUSE):
            backbone = sfd_net.VGG16Backbone(FLAGS.data_format)
            feature_layers = backbone.get_featmaps(features, training=False)
            with tf.device('/cpu:0'):
                anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(positive_threshold=None, ignore_threshold=None, prior_scaling=[0.1, 0.1, 0.2, 0.2])

                if FLAGS.data_format == 'channels_first':
                    all_layer_shapes = [tf.shape(feat)[2:] for feat in feature_layers]
                else:
                    all_layer_shapes = [tf.shape(feat)[1:3] for feat in feature_layers]
                total_layers = len(all_layer_shapes)
                anchors_height = list()
                anchors_width = list()
                anchors_depth = list()
                for ind in range(total_layers):
                    _anchors_height, _anchors_width, _anchor_depth = anchor_encoder_decoder.get_anchors_width_height(all_anchor_scales[ind], all_extra_scales[ind], all_anchor_ratios[ind], name='get_anchors_width_height{}'.format(ind))
                    anchors_height.append(_anchors_height)
                    anchors_width.append(_anchors_width)
                    anchors_depth.append(_anchor_depth)
                anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax, _ = anchor_encoder_decoder.get_all_anchors(tf.squeeze(output_shape, axis=0),
                                                                                anchors_height, anchors_width, anchors_depth,
                                                                                offset_list, all_layer_shapes, all_layer_strides,
                                                                                [0.] * total_layers, [False] * total_layers)
            location_pred, cls_pred = backbone.multibox_head(feature_layers, [1] * len(feature_layers),
                                        [3] + [1] * (len(feature_layers) - 1), anchors_depth)
            if FLAGS.data_format == 'channels_first':
                cls_pred = [tf.transpose(pred, [0, 2, 3, 1]) for pred in cls_pred]
                location_pred = [tf.transpose(pred, [0, 2, 3, 1]) for pred in location_pred]

            cls_pred = [tf.reshape(pred, [-1, FLAGS.num_classes]) for pred in cls_pred]
            location_pred = [tf.reshape(pred, [-1, 4]) for pred in location_pred]

            cls_pred = tf.nn.softmax(tf.concat(cls_pred, axis=0))[:, -1]
            location_pred = tf.concat(location_pred, axis=0)

        with tf.device('/cpu:0'):
            bboxes_pred = anchor_encoder_decoder.decode_anchors(location_pred, anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax)

        saver = tf.train.Saver()
        with tf.Session() as sess:
            init = tf.global_variables_initializer()
            sess.run(init)

            saver.restore(sess, get_checkpoint())

            os.makedirs(FLAGS.det_dir, exist_ok=True)

            if FLAGS.subset is 'val':
                wider_face = sio.loadmat(os.path.join(FLAGS.data_dir, 'wider_face_split', 'wider_face_val.mat'))    # Val set
            else:
                wider_face = sio.loadmat(os.path.join(FLAGS.data_dir, 'wider_face_split', 'wider_face_test.mat'))     # Test set
            event_list = wider_face['event_list']
            file_list = wider_face['file_list']
            del wider_face

            Path = os.path.join(FLAGS.data_dir, ('WIDER_val' if FLAGS.subset is 'val' else 'WIDER_test'), 'images')
            save_path = os.path.join(FLAGS.det_dir, FLAGS.subset)
            len_event = len(event_list)
            for index, event in enumerate(event_list):
                filelist = file_list[index][0]
                len_files = len(filelist)
                if not os.path.exists(os.path.join(save_path, event[0][0])):
                    os.makedirs(os.path.join(save_path, event[0][0]))

                for num, file in enumerate(filelist):
                    im_name = file[0][0]
                    Image_Path = os.path.join(Path, event[0][0], im_name[:]+'.jpg')

                    image = imread(Image_Path)
                    #image = imread('manymany.jpg')

                    max_im_shrink = (0x7fffffff / FLAGS.memory_limit / (image.shape[0] * image.shape[1])) ** 0.5 # the max size of input image for caffe
                    #max_im_shrink = (0x7fffffff / 80.0 / (image.shape[0] * image.shape[1])) ** 0.5 # the max size of input image for caffe
                    shrink = max_im_shrink if max_im_shrink < 1 else 1

                    det0 = detect_face([sess, image_input, bboxes_pred, cls_pred], image, shrink)  # origin test
                    det1 = flip_test([sess, image_input, bboxes_pred, cls_pred], image, shrink)    # flip test
                    [det2, det3] = multi_scale_test([sess, image_input, bboxes_pred, cls_pred], image, max_im_shrink)  #multi-scale test
                    # merge all test results via bounding box voting
                    det = np.row_stack((det0, det1, det2, det3))
                    dets = bbox_vote(det)

                    f = open(os.path.join(save_path, event[0][0], im_name+'.txt'), 'w')
                    write_to_txt(f, dets, event, im_name)
                    f.close()
                    if num % FLAGS.log_every_n_steps == 0:
                        img_to_draw = draw_toolbox.bboxes_draw_on_img(image, (dets[:, 4] > 0.2).astype(np.int32), dets[:, 4], dets[:, :4], thickness=2)
                        imsave(os.path.join(FLAGS.debug_dir, '{}.jpg'.format(im_name)), img_to_draw)

                    #imsave(os.path.join('./debug/{}_{}.jpg').format(index, num), draw_toolbox.absolute_bboxes_draw_on_img(image, (dets[:, 4]>0.1).astype(np.int32), dets[:, 4], dets[:, :4], thickness=2))
                    #break
                    sys.stdout.write('\r>> Predicting event:%d/%d num:%d/%d' % (index + 1, len_event, num + 1, len_files))
                    sys.stdout.flush()
                sys.stdout.write('\n')
                sys.stdout.flush()
Пример #14
0
def main(_):
    with tf.Graph().as_default():
        out_shape = [FLAGS.train_image_size] * 2

        image_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
        shape_input = tf.placeholder(tf.int32, shape=(2, ))

        features = ssd_preprocessing.preprocess_for_eval(
            image_input,
            out_shape,
            data_format=FLAGS.data_format,
            output_rgb=False)
        features = tf.expand_dims(features, axis=0)

        anchor_creator = anchor_manipulator.AnchorCreator(
            out_shape,
            layers_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3),
                           (1, 1)],
            anchor_scales=[(0.1, ), (0.2, ), (0.375, ), (0.55, ), (0.725, ),
                           (0.9, )],
            extra_anchor_scales=[(0.1414, ), (0.2739, ), (0.4541, ),
                                 (0.6315, ), (0.8078, ), (0.9836, )],
            anchor_ratios=[(1., 2., .5), (1., 2., 3., .5, 0.3333),
                           (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333),
                           (1., 2., .5), (1., 2., .5)],
            #anchor_ratios = [(2., .5), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333), (2., .5), (2., .5)],
            layer_steps=[8, 16, 32, 64, 100, 300])
        all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors(
        )

        anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(
            allowed_borders=[1.0] * 6,
            positive_threshold=None,
            ignore_threshold=None,
            prior_scaling=[0.1, 0.1, 0.2, 0.2])

        decode_fn = lambda pred: anchor_encoder_decoder.ext_decode_all_anchors(
            pred, all_anchors, all_num_anchors_depth, all_num_anchors_spatial)

        with tf.variable_scope(FLAGS.model_scope,
                               default_name=None,
                               values=[features],
                               reuse=tf.AUTO_REUSE):
            backbone = ssd_net.VGG16Backbone(FLAGS.data_format)
            feature_layers = backbone.forward(features, training=False)
            location_pred, cls_pred = ssd_net.multibox_head(
                feature_layers,
                FLAGS.num_classes,
                all_num_anchors_depth,
                data_format=FLAGS.data_format)
            if FLAGS.data_format == 'channels_first':
                cls_pred = [
                    tf.transpose(pred, [0, 2, 3, 1]) for pred in cls_pred
                ]
                location_pred = [
                    tf.transpose(pred, [0, 2, 3, 1]) for pred in location_pred
                ]

            cls_pred = [
                tf.reshape(pred, [-1, FLAGS.num_classes]) for pred in cls_pred
            ]
            location_pred = [
                tf.reshape(pred, [-1, 4]) for pred in location_pred
            ]

            cls_pred = tf.concat(cls_pred, axis=0)
            location_pred = tf.concat(location_pred, axis=0)

        with tf.device('/cpu:0'):
            bboxes_pred = decode_fn(location_pred)
            bboxes_pred = tf.concat(bboxes_pred, axis=0)
            selected_bboxes, selected_scores = parse_by_class(
                cls_pred, bboxes_pred, FLAGS.num_classes,
                FLAGS.select_threshold, FLAGS.min_size, FLAGS.keep_topk,
                FLAGS.nms_topk, FLAGS.nms_threshold)

            labels_list = []
            scores_list = []
            bboxes_list = []
            for k, v in selected_scores.items():
                labels_list.append(tf.ones_like(v, tf.int32) * k)
                scores_list.append(v)
                bboxes_list.append(selected_bboxes[k])
            all_labels = tf.concat(labels_list, axis=0)
            all_scores = tf.concat(scores_list, axis=0)
            all_bboxes = tf.concat(bboxes_list, axis=0)

        saver = tf.train.Saver()
        with tf.Session() as sess:
            init = tf.global_variables_initializer()
            sess.run(init)

            saver.restore(sess, get_checkpoint())

            for i in range(video_frame_cnt):
                ret, img_ori = vid.read()

                # height_ori, width_ori = img_ori.shape[:2]
                # img = cv2.resize(img_ori, tuple(args.new_size))
                img = cv2.cvtColor(img_ori, cv2.COLOR_BGR2RGB)
                np_image = np.asarray(img, np.float32)

                start_time = time.time()
                labels_, scores_, bboxes_ = sess.run(
                    [all_labels, all_scores, all_bboxes],
                    feed_dict={
                        image_input: np_image,
                        shape_input: np_image.shape[:-1]
                    })
                end_time = time.time()

                img_to_draw = draw_toolbox.bboxes_draw_on_img(np_image,
                                                              labels_,
                                                              scores_,
                                                              bboxes_,
                                                              thickness=2)
                cv2.putText(img_to_draw,
                            '{:.2f}ms'.format((end_time - start_time) * 1000),
                            (40, 40),
                            0,
                            fontScale=1,
                            color=(0, 255, 0),
                            thickness=2)

                imsave('./test_out.jpg', img_to_draw)

                new_img = cv2.imread('./test_out.jpg')
                cv2.imshow('image', new_img)

                videoWriter.write(new_img)
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

            vid.release()
            videoWriter.release()
Пример #15
0
def main(_):
    with tf.Graph().as_default():
        image_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
        shape_input = tf.placeholder(tf.int32, shape=(2, ))

        features = common_preprocessing.light_head_preprocess_for_test(
            image_input, [FLAGS.train_image_size] * 2,
            data_format=('NCHW'
                         if FLAGS.data_format == 'channels_first' else 'NHWC'))

        features = tf.expand_dims(features, axis=0)

        anchor_creator = anchor_manipulator.AnchorCreator(
            [FLAGS.train_image_size] * 2,
            layers_shapes=[(30, 30)],
            anchor_scales=[[0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]],
            extra_anchor_scales=[[0.1]],
            anchor_ratios=[[1., 2., .5]],
            layer_steps=[16])

        all_anchors, num_anchors_list = anchor_creator.get_all_anchors()

        anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(
            all_anchors,
            num_classes=FLAGS.num_classes,
            allowed_borders=None,
            positive_threshold=None,
            ignore_threshold=None,
            prior_scaling=[1., 1., 1., 1.])

        with tf.variable_scope(FLAGS.model_scope,
                               default_name=None,
                               values=[features],
                               reuse=tf.AUTO_REUSE):
            rpn_feat_map, backbone_feat = xception_body.XceptionBody(
                features,
                FLAGS.num_classes,
                is_training=False,
                data_format=FLAGS.data_format)
            #rpn_feat_map = tf.Print(rpn_feat_map,[tf.shape(rpn_feat_map), rpn_feat_map,backbone_feat])
            rpn_cls_score, rpn_bbox_pred = xception_body.get_rpn(
                rpn_feat_map, num_anchors_list[0], False, FLAGS.data_format,
                'rpn_head')

            large_sep_feature = xception_body.large_sep_kernel(
                backbone_feat, 256, 10 * 7 * 7, False, FLAGS.data_format,
                'large_sep_feature')

            if FLAGS.data_format == 'channels_first':
                rpn_cls_score = tf.transpose(rpn_cls_score, [0, 2, 3, 1])
                rpn_bbox_pred = tf.transpose(rpn_bbox_pred, [0, 2, 3, 1])

            rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2])
            rpn_object_score = tf.nn.softmax(rpn_cls_score)[:, -1]

            rpn_object_score = tf.reshape(rpn_object_score, [1, -1])
            rpn_location_pred = tf.reshape(rpn_bbox_pred, [1, -1, 4])

            rpn_bboxes_pred = anchor_encoder_decoder.decode_all_anchors(
                [rpn_location_pred], squeeze_inner=True)[0]

            proposals_bboxes = xception_body.get_proposals(
                rpn_object_score, rpn_bboxes_pred, None,
                FLAGS.rpn_pre_nms_top_n, FLAGS.rpn_post_nms_top_n,
                FLAGS.rpn_nms_thres, FLAGS.rpn_min_size, False,
                FLAGS.data_format)

            cls_score, bboxes_reg = xception_body.get_head(
                large_sep_feature, lambda input_, bboxes_,
                grid_width_, grid_height_: ps_roi_align(
                    input_, bboxes_, grid_width_, grid_height_, pool_method),
                7, 7, None, proposals_bboxes, FLAGS.num_classes, False, False,
                0, FLAGS.data_format, 'final_head')

            head_bboxes_pred = anchor_encoder_decoder.ext_decode_rois(
                proposals_bboxes,
                bboxes_reg,
                head_prior_scaling=[1., 1., 1., 1.])

            head_cls_score = tf.reshape(cls_score, [-1, FLAGS.num_classes])
            head_cls_score = tf.nn.softmax(head_cls_score)
            head_bboxes_pred = tf.reshape(head_bboxes_pred, [-1, 4])

            with tf.device('/device:CPU:0'):
                selected_scores, selected_bboxes = eval_helper.tf_bboxes_select(
                    [head_cls_score], [head_bboxes_pred],
                    FLAGS.select_threshold,
                    FLAGS.num_classes,
                    scope='xdet_v2_select')

                selected_bboxes = eval_helper.bboxes_clip(
                    tf.constant([0., 0., 1., 1.]), selected_bboxes)
                selected_scores, selected_bboxes = eval_helper.filter_boxes(
                    selected_scores,
                    selected_bboxes,
                    0.03,
                    shape_input, [FLAGS.train_image_size] * 2,
                    keep_top_k=FLAGS.nms_topk * 2)

                # Resize bboxes to original image shape.
                selected_bboxes = eval_helper.bboxes_resize(
                    tf.constant([0., 0., 1., 1.]), selected_bboxes)

                selected_scores, selected_bboxes = eval_helper.bboxes_sort(
                    selected_scores, selected_bboxes, top_k=FLAGS.nms_topk * 2)

                # Apply NMS algorithm.
                selected_scores, selected_bboxes = eval_helper.bboxes_nms_batch(
                    selected_scores,
                    selected_bboxes,
                    nms_threshold=FLAGS.nms_threshold,
                    keep_top_k=FLAGS.nms_topk)

                labels_list = []
                for k, v in selected_scores.items():
                    labels_list.append(tf.ones_like(v, tf.int32) * k)
                all_labels = tf.concat(labels_list, axis=0)
                all_scores = tf.concat(list(selected_scores.values()), axis=0)
                all_bboxes = tf.concat(list(selected_bboxes.values()), axis=0)

        saver = tf.train.Saver()
        with tf.Session() as sess:
            init = tf.global_variables_initializer()
            sess.run(init)

            saver.restore(sess, FLAGS.checkpoint_path)

            np_image = imread('./demo/test.jpg')
            labels_, scores_, bboxes_ = sess.run(
                [all_labels, all_scores, all_bboxes],
                feed_dict={
                    image_input: np_image,
                    shape_input: np_image.shape[:-1]
                })

            img_to_draw = draw_toolbox.bboxes_draw_on_img(np_image,
                                                          labels_,
                                                          scores_,
                                                          bboxes_,
                                                          thickness=2)
            imsave(os.path.join(FLAGS.debug_dir, 'test_out.jpg'), img_to_draw)