示例#1
0
    def __init__(self, args):
        self.args = args

        with open(os.path.join(args.model_dir, args.log_name)) as the_log:
            log_data = json.load(the_log)[0]

        self.target_shape = Size._make(log_data['target_size'])
        self.image_size = Size._make(log_data['image_size'])

        # Step 1: build network
        localization_net_class_name, localization_module_name = self.get_class_and_module(log_data['localization_net'])
        module = self.load_module(os.path.abspath(os.path.join(args.model_dir, localization_module_name)))
        localization_net_class = eval('module.{}'.format(localization_net_class_name))
        localization_net = self.build_localization_net(localization_net_class)

        recognition_net_class_name, recognition_module_name = self.get_class_and_module(log_data['recognition_net'])
        module = self.load_module(os.path.abspath(os.path.join(args.model_dir, recognition_module_name)))
        recognition_net_class = eval('module.{}'.format(recognition_net_class_name))
        recognition_net = self.build_recognition_net(recognition_net_class)

        fusion_net_class_name, fusion_module_name = self.get_class_and_module(log_data['fusion_net'])
        module = self.load_module(os.path.abspath(os.path.join(args.model_dir, fusion_module_name)))
        fusion_net_class = eval('module.{}'.format(fusion_net_class_name))
        self.net = self.build_fusion_net(fusion_net_class, localization_net, recognition_net)

        if args.gpu >= 0:
            self.net.to_gpu(args.gpu)

        # Step 2: load weights
        with np.load(os.path.join(args.model_dir, args.snapshot_name)) as f:
            chainer.serializers.NpzDeserializer(f).load(self.net)

        # Step 3: open gt and do evaluation
        with open(args.char_map) as the_map:
            self.char_map = json.load(the_map)

        self.xp = chainer.cuda.cupy if args.gpu >= 0 else np

        with open(args.eval_gt) as eval_gt:
            reader = csv.reader(eval_gt, delimiter='\t')
            self.lines = [l for l in reader]

        self.blank_symbol = args.blank_symbol

        self.num_correct_lines = 0
        self.num_correct_words = 0
        self.num_lines = 0
        self.num_words = 0
        self.num_word_x_correct = [0 for _ in range(args.timesteps)]
        self.num_word_x = [0 for _ in range(args.timesteps)]

        self.model_dir = args.model_dir

        self.metrics = self.create_metrics()

        self.save_rois = args.save_rois
        self.bbox_plotter = None
        if self.save_rois:
            self.create_bbox_plotter()
示例#2
0
    args = parser.parse_args()
    # set standard args that should always hold true if using the supplied model
    args.is_original_fsns = True
    args.log_name = 'log'
    args.dropout_ratio = 0.5
    args.blank_symbol = 0
    # max number of text regions in the image
    args.timesteps = 4
    # max number of characters per word
    args.num_labels = 21

    # open log and extract meta information
    with open(os.path.join(args.model_dir, args.log_name)) as the_log:
        log_data = json.load(the_log)[0]

    target_shape = Size._make(log_data['target_size'])
    image_size = Size._make(log_data['image_size'])

    xp = chainer.cuda.cupy if args.gpu >= 0 else np
    network = create_network(args, log_data)

    # load weights
    with np.load(os.path.join(args.model_dir, args.snapshot_name)) as f:
        chainer.serializers.NpzDeserializer(f).load(network)

    # load char map
    with open(args.char_map) as the_map:
        char_map = json.load(the_map)

    # load image
    image = load_image(args.image_path, xp)