def __init__(self,
                 model_dir,
                 in_dir,
                 out_dir,
                 nms_th_for_all_scale=0.5,
                 score_th=0.2,
                 scales=([384, 384], [768, 384], [768, 768]),
                 min_side_scale=384,
                 save_res_path='eval_res.txt'
                 ):
        if os.path.exists(in_dir):
            self.in_dir = in_dir
        else:
            raise ValueError('{} does not existed!!!'.format(in_dir))

        self.out_dir = out_dir
        self.suffixes = ['.png', '.PNG', '.jpg', '.jpeg']

        self.img_path, self.img_num = self.get_img_path()

        self.nms_th_for_all_scale = nms_th_for_all_scale
        self.nms_threshold = 0.45
        self.score_th = score_th
        print('self.score_th', self.score_th)
        self.make_out_dir()
        self.text_scales = scales
        self.data_format = 'NHWC'
        self.select_threshold = 0.01
        self.min_side_scale = min_side_scale
        self.max_side_scale = self.min_side_scale * 2  # 384 * 2
        self.save_xml_flag = True
        self.save_txt_flag = True
        self.dynamic_scale_flag = False
        self.allow_padding = False
        self.allow_post_processing = False
        self.allow_eval_flag = False
        self.resize_flag = False
        self.save_eval_resut_path = save_res_path
        self.model_path = None

        self.config = tf.ConfigProto(allow_soft_placement=True)
        self.config.gpu_options.allow_growth = True

        self.graph = tf.Graph()
        self.session_text = tf.Session(graph=self.graph, config=self.config)

        with self.session_text.as_default():
            with self.graph.as_default():
                self.img_text = tf.placeholder(
                    tf.float32, shape=(None, None, 3))
                print(len(self.text_scales))
                self.scale_text = tf.placeholder(tf.int32, shape=(2))

                img_pre_text, label_pre_text, bboxes_pre_text, self.bboxes_img_text, xs_text, ys_text = ssd_vgg_preprocessing.preprocess_for_eval(
                    self.img_text,
                    None,
                    None,
                    None,
                    None,
                    self.scale_text,
                    self.data_format,
                    resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
                image_text_4d = tf.expand_dims(img_pre_text, 0)
                image_text_4d = tf.cast(image_text_4d, tf.float32)
                self.image_text_4d = image_text_4d
                self.net_text = txtbox_384.TextboxNet()
                with slim.arg_scope(
                        self.net_text.arg_scope(data_format=self.data_format)):
                    self.predictions_text, self.localisations_text, self.logits_text, self.endpoints_text, self.l_shape = self.net_text.net(
                        self.image_text_4d,
                        is_training=False,
                        reuse=tf.AUTO_REUSE,
                        update_feat_shapes=True)
                saver_text = tf.train.Saver()
                if os.path.isdir(model_dir):
                    ckpt_path = tf.train.latest_checkpoint(model_dir)
                    self.model_path = os.path.join(model_dir, ckpt_path)
                else:
                    ckpt_path = model_dir
                    self.model_path = ckpt_path
                print(model_dir)
                saver_text.restore(self.session_text, ckpt_path)

        logging.info("Textbox++ model initialized.")
示例#2
0
config = tf.compat.v1.ConfigProto(log_device_placement=False,
                                  gpu_options=gpu_options)
isess = tf.compat.v1.Session(config=config)

# Input placeholder.
net_shape = (384, 384)
#net_shape = (768, 768)
data_format = 'NHWC'
img_input = tf.compat.v1.placeholder(tf.float32, shape=(None, None, 3))
# Evaluation pre-processing: resize to SSD net shape.
image_pre, labels_pre, bboxes_pre, bbox_img, xs, ys = ssd_vgg_preprocessing.preprocess_for_eval(
    img_input,
    None,
    None,
    None,
    None,
    net_shape,
    data_format,
    resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
image_4d = tf.expand_dims(image_pre, 0)
image_4d = tf.cast(image_4d, tf.float32)
# Define the txt_box model.
reuse = True if 'txt_net' in locals() else None

txt_net = txtbox_384.TextboxNet()
print(txt_net.params.img_shape)
print('reuse:', reuse)

with slim.arg_scope(txt_net.arg_scope(data_format=data_format)):
    predictions, localisations, logits, end_points = txt_net.net(