示例#1
0
    def __init__(self, args):
        self.args = args

        with open(os.path.join(args.model_dir, args.log_name)) as the_log:
            log_data = json.load(the_log)[0]

        self.target_shape = Size._make(log_data['target_size'])
        self.image_size = Size._make(log_data['image_size'])

        # Step 1: build network
        localization_net_class_name, localization_module_name = self.get_class_and_module(log_data['localization_net'])
        module = self.load_module(os.path.abspath(os.path.join(args.model_dir, localization_module_name)))
        localization_net_class = eval('module.{}'.format(localization_net_class_name))
        localization_net = self.build_localization_net(localization_net_class)

        recognition_net_class_name, recognition_module_name = self.get_class_and_module(log_data['recognition_net'])
        module = self.load_module(os.path.abspath(os.path.join(args.model_dir, recognition_module_name)))
        recognition_net_class = eval('module.{}'.format(recognition_net_class_name))
        recognition_net = self.build_recognition_net(recognition_net_class)

        fusion_net_class_name, fusion_module_name = self.get_class_and_module(log_data['fusion_net'])
        module = self.load_module(os.path.abspath(os.path.join(args.model_dir, fusion_module_name)))
        fusion_net_class = eval('module.{}'.format(fusion_net_class_name))
        self.net = self.build_fusion_net(fusion_net_class, localization_net, recognition_net)

        if args.gpu >= 0:
            self.net.to_gpu(args.gpu)

        # Step 2: load weights
        with np.load(os.path.join(args.model_dir, args.snapshot_name)) as f:
            chainer.serializers.NpzDeserializer(f).load(self.net)

        # Step 3: open gt and do evaluation
        with open(args.char_map) as the_map:
            self.char_map = json.load(the_map)

        self.xp = chainer.cuda.cupy if args.gpu >= 0 else np

        with open(args.eval_gt) as eval_gt:
            reader = csv.reader(eval_gt, delimiter='\t')
            self.lines = [l for l in reader]

        self.blank_symbol = args.blank_symbol

        self.num_correct_lines = 0
        self.num_correct_words = 0
        self.num_lines = 0
        self.num_words = 0
        self.num_word_x_correct = [0 for _ in range(args.timesteps)]
        self.num_word_x = [0 for _ in range(args.timesteps)]

        self.model_dir = args.model_dir

        self.metrics = self.create_metrics()

        self.save_rois = args.save_rois
        self.bbox_plotter = None
        if self.save_rois:
            self.create_bbox_plotter()
    def __init__(self,
                 gt_file,
                 image_size,
                 root='.',
                 dtype=None,
                 transform_probability=0,
                 image_mode='L',
                 keep_aspect_ratio=False,
                 resize_after_load=True):
        _check_pillow_availability()
        assert isinstance(gt_file,
                          six.string_types), "paths must be a file name!"
        assert os.path.splitext(gt_file)[
            -1] == ".json", "You have to supply gt information as json file!"

        if not isinstance(image_size, Size):
            image_size = Size(*image_size)

        with open(gt_file) as handle:
            self.gt_data = json.load(handle)

        self.root = root
        self.dtype = chainer.get_dtype(dtype)
        self.image_size = image_size
        self.transform_probability = transform_probability
        self.keep_aspect_ratio = keep_aspect_ratio
        self.resize_after_load = resize_after_load
        self.image_mode = image_mode
        self.augmentations = self.init_augmentations()
    def draw_bbox(self, bboxes, output_size, target_size, draw, colour):
        bboxes = np.squeeze(bboxes)
        bboxes = (bboxes + 1) / 2

        single_image_target_size = Size(width=target_size.width // 4,
                                        height=target_size.height)

        bboxes[:, 0] *= single_image_target_size.width
        bboxes[:, 1] *= single_image_target_size.height

        for idx, bbox in enumerate(np.split(bboxes, len(bboxes), axis=0)):
            bbox = np.squeeze(bbox, axis=0)

            x = np.clip(bbox[0].reshape(output_size.height, output_size.width),
                        0, single_image_target_size.width)
            y = np.clip(bbox[1].reshape(output_size.height, output_size.width),
                        0, single_image_target_size.height)

            x += idx * single_image_target_size.width

            top_left = (x[0, 0], y[0, 0])
            top_right = (x[0, -1], y[0, -1])
            bottom_left = (x[-1, 0], y[-1, 0])
            bottom_right = (x[-1, -1], y[-1, -1])

            draw.polygon(
                [top_left, top_right, bottom_right, bottom_left],
                outline=colour,
            )
示例#4
0
    def create_sliding_window(self, image):
        loaded_size = Size(*image.shape[-2:])
        windows_at_width, width_overlap = self.get_num_windows_and_overlap(
            loaded_size.width, self.image_size.width)
        windows_at_height, height_overlap = self.get_num_windows_and_overlap(
            loaded_size.height, self.image_size.height)

        bboxes = []
        crops = []
        for j in range(windows_at_height):
            for i in range(windows_at_width):
                bbox = AxisAlignedBBox(
                    left=i * self.image_size.width - i * width_overlap,
                    top=j * self.image_size.height - j * height_overlap,
                    right=(i + 1) * self.image_size.width - i * width_overlap,
                    bottom=(j + 1) * self.image_size.height -
                    j * height_overlap,
                )
                bboxes.append(bbox)
                window = image[:, bbox.top:bbox.bottom, bbox.left:bbox.right]
                if self.binarize_windows:
                    window = self.binarize_image(window)
                crops.append(window)

        return numpy.stack(crops, axis=0), bboxes
示例#5
0
    def __init__(self, *args, **kwargs):
        kwargs['resize_after_load'] = False
        kwargs['transform_probability'] = 0

        image_size = kwargs.pop('image_size')
        if not isinstance(image_size, Size):
            image_size = Size(*image_size)

        self.max_size = kwargs.pop('max_size')
        self.image_size = image_size
        self.binarize_windows = kwargs.pop('binarize_windows', False)
        self.threshold_method = kwargs.pop('threshold_method', 'otsu')

        super().__init__()
示例#6
0
    def resize_image(self, image):
        loaded_size = Size(*image.shape[-2:])

        if all(
                x < self.max_size for x in loaded_size
        ) and loaded_size.width > self.image_size.width and loaded_size.height > self.image_size.height:
            return image

        if loaded_size.width > loaded_size.height:
            aspect_ratio = loaded_size.height / loaded_size.width
            new_size = Size(height=max(int(self.max_size * aspect_ratio),
                                       self.image_size.height),
                            width=self.max_size)
        else:
            aspect_ratio = loaded_size.width / loaded_size.height
            new_size = Size(height=self.max_size,
                            width=max(int(self.max_size * aspect_ratio),
                                      self.image_size.width))

        image = image.transpose(1, 2, 0)
        image = cv2.resize(image, (new_size.width, new_size.height),
                           interpolation=cv2.INTER_AREA)
        image = image.transpose(2, 0, 1)
        return image
示例#7
0
 def __init__(self, image, out_dir, out_size, loss_metrics, **kwargs):
     super(BBOXPlotter, self).__init__()
     self.image = image
     self.render_extracted_rois = kwargs.pop("render_extracted_rois", True)
     self.image_size = Size(height=image.shape[1], width=image.shape[2])
     self.out_dir = out_dir
     os.makedirs(self.out_dir, exist_ok=True)
     self.out_size = out_size
     self.colours = COLOR_MAP
     self.send_bboxes = kwargs.pop("send_bboxes", False)
     self.upstream_ip = kwargs.pop("upstream_ip", '127.0.0.1')
     self.upstream_port = kwargs.pop("upstream_port", 1337)
     self.loss_metrics = loss_metrics
     self.font = ImageFont.truetype("utils/DejaVuSans.ttf", 20)
     self.visualization_anchors = kwargs.pop("visualization_anchors", [])
     self.visual_backprop = VisualBackprop()
     self.xp = np
    parser.add_argument("--load-recognition", action='store_true', default=False, help="only load recognition net")
    parser.add_argument("--is-trainer-snapshot", action='store_true', default=False,
                        help="indicate that snapshot to load has been saved by trainer itself")
    parser.add_argument("--no-log", action='store_false', default=True, help="disable logging")
    parser.add_argument("--freeze-localization", action='store_true', default=False,
                        help='freeze weights of localization net')
    parser.add_argument("--zoom", type=float, default=0.9, help="Zoom for initial bias of spatial transformer")
    parser.add_argument("--optimize-all-interval", type=int, default=5,
                        help="interval in which to optimize the whole network instead of only a part")
    parser.add_argument("--use-dropout", action='store_true', default=False, help='use dropout in network')
    parser.add_argument("--test-image", help='path to an image that should be used by BBoxPlotter')
    parser = add_default_arguments(parser)
    args = parser.parse_args()
    args.is_original_fsns = True

    image_size = Size(width=150, height=150)
    target_shape = Size(width=100, height=75)

    # attributes that need to be adjusted, once the Curriculum decides to use
    # a more difficult dataset
    # this is a 'map' of attribute name to a path in the trainer object
    attributes_to_adjust = [
        ('num_timesteps', ['predictor', 'localization_net']),
        ('num_timesteps', ['predictor', 'recognition_net']),
        ('num_timesteps', ['lossfun', '__self__']),
        ('num_labels', ['predictor', 'recognition_net']),
    ]

    # create train curriculum
    curriculum = BabyStepCurriculum(
        args.dataset_specification,
示例#9
0
    def save_extracted_regions(self,
                               input,
                               regions,
                               transform_params,
                               iteration,
                               labels,
                               gt_bboxes=None,
                               extra_transform_params=None,
                               extra_interpolated_areas=None):
        sampling_grids = get_sampling_grid(transform_params, self.output_size)

        if extra_transform_params is not None:
            extra_size = Size(width=self.output_size.width // 2,
                              height=self.output_size.height // 2)
            extra_sampling_grids = get_sampling_grid(
                extra_transform_params,
                extra_size,
            )
        else:
            extra_sampling_grids = np.zeros_like(sampling_grids)
            extra_interpolated_areas = np.zeros_like(sampling_grids)

        if self.plot_individual_regions or iteration % 1000 == 0:
            dest_image_size = ((len(regions) + 1) * self.image_size.width,
                               self.image_size.height
                               if not self.plot_extra_loc else 2 *
                               self.image_size.height)
        else:
            dest_image_size = (self.image_size.width, self.image_size.height)
        dest_image = Image.new('RGB', dest_image_size)

        image = self.create_image_from_array(input, resize=False)
        draw = ImageDraw.Draw(image)

        data_iter = zip(
            np.split(regions, len(regions)),
            np.split(sampling_grids, len(sampling_grids)),
            np.split(extra_sampling_grids, len(extra_sampling_grids)),
            np.split(extra_interpolated_areas, len(extra_interpolated_areas)),
            COLOR_MAP,
        )

        for idx, (region, bbox, extra_bbox, extra_region,
                  colour) in enumerate(data_iter, start=1):
            # show each individual region if the user wants it or after every 1000 iterations for debugging purposes
            if self.plot_individual_regions or iteration % 1000 == 0:
                region_image = self.create_image_from_array(region)

                if self.plot_extra_loc:
                    region_draw = ImageDraw.Draw(region_image)
                    self.draw_bbox(extra_bbox, extra_size, self.image_size,
                                   region_draw, colour)
                    extra_region_image = self.create_image_from_array(
                        extra_region)
                    dest_image.paste(
                        extra_region_image,
                        (idx * self.image_size.width, self.image_size.height))

                dest_image.paste(region_image,
                                 (idx * self.image_size.width, 0))

            # draw bbox
            self.draw_bbox(bbox, self.output_size, self.image_size, draw,
                           colour)

        if gt_bboxes is not None:
            for gt_bbox, colour in zip(np.split(gt_bboxes, len(gt_bboxes)),
                                       reversed(COLOR_MAP)):
                gt_bbox = np.squeeze(gt_bbox, axis=0)
                top_left = (gt_bbox[0], gt_bbox[2])
                top_right = (gt_bbox[0] + gt_bbox[1], gt_bbox[2])
                bottom_left = (gt_bbox[0], gt_bbox[2] + gt_bbox[3])
                bottom_right = (gt_bbox[0] + gt_bbox[1],
                                gt_bbox[2] + gt_bbox[3])

                draw.polygon(
                    [top_left, top_right, bottom_right, bottom_left],
                    outline=colour,
                )

        if self.show_labels:
            # only keep ascii characters
            # labels = ''.join(filter(lambda x: len(x) == len(x.encode()), labels))
            text_width, text_height = draw.textsize(labels, font=self.font)
            draw = ImageDraw.Draw(dest_image)
            draw.text((dest_image.width - text_width - 1, 0),
                      labels,
                      fill='green',
                      font=self.font)

        dest_image.paste(image, (0, 0))
        if self.save_labels:
            file_name = "{}_{}.png".format(
                os.path.join(self.bbox_dir, str(iteration)), labels)
        else:
            file_name = "{}.png".format(
                os.path.join(self.bbox_dir, str(iteration)))
        dest_image.save(file_name)

        if self.send_bboxes:
            self.send_image(dest_image)
示例#10
0
        action='store_true',
        default=False,
        help="indicate that you do not want to use the multi process iterator")
    parser.add_argument("--refinement",
                        action='store_true',
                        default=False,
                        help='enable param refinement with IC-STN')
    parser.add_argument(
        "--render-all-bboxes",
        action='store_true',
        default=False,
        help="bbox plotter also renders all intermediate bboxes")
    parser = add_default_arguments(parser)
    args = parser.parse_args()

    image_size = Size(width=200, height=64)
    target_shape = Size(width=50, height=50)

    # attributes that need to be adjusted, once the Curriculum decides to use
    # a more difficult dataset
    # this is a 'map' of attribute name to path in trainer object
    attributes_to_adjust = [
        ('num_timesteps', ['predictor', 'localization_net']),
        ('num_timesteps', ['predictor', 'recognition_net']),
        ('num_timesteps', ['lossfun', '__self__']),
        ('num_labels', ['predictor', 'recognition_net']),
    ]

    curriculum = BabyStepCurriculum(args.dataset_specification,
                                    TextRecFileDataset,
                                    args.blank_label,
示例#11
0
        default=5,
        help="number of timesteps the GRU shall run [default: 5]")
    parser.add_argument(
        "--alternative",
        action="store_true",
        default=False,
        help="use alternative implementation of spatial Transformers")
    parser.add_argument("-ds",
                        dest='downsample_factor',
                        type=int,
                        default=2,
                        help="downsample for image sampler")
    parser = add_default_arguments(parser)
    args = parser.parse_args()

    image_size = Size(width=200, height=200)

    localization_net = MNISTLocalizationNet(args.dropout_ratio, args.timesteps)
    recognition_net = MNISTRecognitionNet(
        image_size,
        args.dropout_ratio,
        downsample_factor=args.downsample_factor,
        use_alternative=args.alternative)
    net = MNISTNet(localization_net, recognition_net)

    model = Classifier(net, ('accuracy', ),
                       lossfun=mnist_loss,
                       accfun=mnist_accuracy)
    if args.gpu >= 0:
        cuda.get_device(args.gpu).use()
        model.to_gpu()
示例#12
0
    args = parser.parse_args()
    # set standard args that should always hold true if using the supplied model
    args.is_original_fsns = True
    args.log_name = 'log'
    args.dropout_ratio = 0.5
    args.blank_symbol = 0
    # max number of text regions in the image
    args.timesteps = 4
    # max number of characters per word
    args.num_labels = 21

    # open log and extract meta information
    with open(os.path.join(args.model_dir, args.log_name)) as the_log:
        log_data = json.load(the_log)[0]

    target_shape = Size._make(log_data['target_size'])
    image_size = Size._make(log_data['image_size'])

    xp = chainer.cuda.cupy if args.gpu >= 0 else np
    network = create_network(args, log_data)

    # load weights
    with np.load(os.path.join(args.model_dir, args.snapshot_name)) as f:
        chainer.serializers.NpzDeserializer(f).load(network)

    # load char map
    with open(args.char_map) as the_map:
        char_map = json.load(the_map)

    # load image
    image = load_image(args.image_path, xp)
示例#13
0
                        help='plot bbox predictions and extracted regions')
    parser.add_argument(
        '--save-predictions',
        help='path to csv file where predictions shall be saved for later use')

    args = parser.parse_args()

    with open(args.char_map) as char_map_file:
        char_map = json.load(char_map_file)

    reverse_char_map = {v: k for k, v in char_map.items()}

    args.input_height = 150
    args.input_width = 600
    data_shape = (1, 4, 3, args.input_height, args.input_width // 4)
    input_size = Size(width=data_shape[-1], height=data_shape[-2])
    output_size = Size(width=75, height=50)

    model = get_model(args, data_shape, output_size)

    bbox_plotter = None
    if args.plot:
        bbox_plotter = FSNSBBOXPlotter(
            input_size,
            output_size,
            os.path.dirname(os.path.dirname(args.model_prefix)),
            bbox_dir='eval_bboxes',
            show_labels=True,
            label_map=args.char_map,
            blank_label=args.blank_label,
            plot_extra_loc=False,
示例#14
0
if __name__ == '__main__':
    parser = utils.parse_args()
    args = parser.parse_args()

    if args.send_bboxes and args.ip is None:
        parser.print_usage()
        raise ValueError(
            "You must specify an upstream ip if you want to send the bboxes of each iteration"
        )

    time = datetime.datetime.now().isoformat()
    args.log_dir = os.path.join(args.log_dir,
                                "{}_{}".format(time, args.log_name))
    args.log_file = os.path.join(args.log_dir, 'log')

    image_size = Size(width=600, height=150)
    source_shape = (args.batch_size, 4, 1, image_size.height, image_size.width)
    target_shape = Size(width=75, height=50)
    num_timesteps = 3
    labels_per_timestep = 10
    values_per_bbox = 0
    num_rnn_layers = 1
    label_width = 37
    label_width = num_timesteps * labels_per_timestep
    use_blstm = True
    save_attention = False

    eval_metric = mx.metric.CompositeEvalMetric(metrics=[
        STNAccuracy(make_label_time_major=True),
        STNCrossEntropy(make_label_time_major=True),
    ])
示例#15
0
 def __init__(self, *args, **kwargs):
     super().__init__(*args, **kwargs)
     self.image_size = Size(height=self.image.shape[1],
                            width=self.image.shape[2] // 4)
示例#16
0
                        action='store_true',
                        default=False,
                        help='plot bbox predictions and extracted regions')
    parser.add_argument(
        '--save-predictions',
        help='path to csv file where predictions shall be saved for later use')

    args = parser.parse_args()

    with open(args.char_map) as char_map_file:
        char_map = json.load(char_map_file)

    reverse_char_map = {v: k for k, v in char_map.items()}

    data_shape = (1, 1, args.input_height, args.input_width)
    input_size = Size(width=data_shape[-1], height=data_shape[-2])
    output_size = Size(width=40, height=40)

    model = get_model(args, data_shape, output_size)

    bbox_plotter = None
    if args.plot:
        bbox_plotter = BBOXPlotter(
            input_size,
            output_size,
            os.path.dirname(os.path.dirname(args.model_prefix)),
            bbox_dir='eval_bboxes',
            show_labels=True,
            label_map=args.char_map,
            blank_label=args.blank_label,
        )