def __init__(self, args): self.args = args with open(os.path.join(args.model_dir, args.log_name)) as the_log: log_data = json.load(the_log)[0] self.target_shape = Size._make(log_data['target_size']) self.image_size = Size._make(log_data['image_size']) # Step 1: build network localization_net_class_name, localization_module_name = self.get_class_and_module(log_data['localization_net']) module = self.load_module(os.path.abspath(os.path.join(args.model_dir, localization_module_name))) localization_net_class = eval('module.{}'.format(localization_net_class_name)) localization_net = self.build_localization_net(localization_net_class) recognition_net_class_name, recognition_module_name = self.get_class_and_module(log_data['recognition_net']) module = self.load_module(os.path.abspath(os.path.join(args.model_dir, recognition_module_name))) recognition_net_class = eval('module.{}'.format(recognition_net_class_name)) recognition_net = self.build_recognition_net(recognition_net_class) fusion_net_class_name, fusion_module_name = self.get_class_and_module(log_data['fusion_net']) module = self.load_module(os.path.abspath(os.path.join(args.model_dir, fusion_module_name))) fusion_net_class = eval('module.{}'.format(fusion_net_class_name)) self.net = self.build_fusion_net(fusion_net_class, localization_net, recognition_net) if args.gpu >= 0: self.net.to_gpu(args.gpu) # Step 2: load weights with np.load(os.path.join(args.model_dir, args.snapshot_name)) as f: chainer.serializers.NpzDeserializer(f).load(self.net) # Step 3: open gt and do evaluation with open(args.char_map) as the_map: self.char_map = json.load(the_map) self.xp = chainer.cuda.cupy if args.gpu >= 0 else np with open(args.eval_gt) as eval_gt: reader = csv.reader(eval_gt, delimiter='\t') self.lines = [l for l in reader] self.blank_symbol = args.blank_symbol self.num_correct_lines = 0 self.num_correct_words = 0 self.num_lines = 0 self.num_words = 0 self.num_word_x_correct = [0 for _ in range(args.timesteps)] self.num_word_x = [0 for _ in range(args.timesteps)] self.model_dir = args.model_dir self.metrics = self.create_metrics() self.save_rois = args.save_rois self.bbox_plotter = None if self.save_rois: self.create_bbox_plotter()
def __init__(self, gt_file, image_size, root='.', dtype=None, transform_probability=0, image_mode='L', keep_aspect_ratio=False, resize_after_load=True): _check_pillow_availability() assert isinstance(gt_file, six.string_types), "paths must be a file name!" assert os.path.splitext(gt_file)[ -1] == ".json", "You have to supply gt information as json file!" if not isinstance(image_size, Size): image_size = Size(*image_size) with open(gt_file) as handle: self.gt_data = json.load(handle) self.root = root self.dtype = chainer.get_dtype(dtype) self.image_size = image_size self.transform_probability = transform_probability self.keep_aspect_ratio = keep_aspect_ratio self.resize_after_load = resize_after_load self.image_mode = image_mode self.augmentations = self.init_augmentations()
def draw_bbox(self, bboxes, output_size, target_size, draw, colour): bboxes = np.squeeze(bboxes) bboxes = (bboxes + 1) / 2 single_image_target_size = Size(width=target_size.width // 4, height=target_size.height) bboxes[:, 0] *= single_image_target_size.width bboxes[:, 1] *= single_image_target_size.height for idx, bbox in enumerate(np.split(bboxes, len(bboxes), axis=0)): bbox = np.squeeze(bbox, axis=0) x = np.clip(bbox[0].reshape(output_size.height, output_size.width), 0, single_image_target_size.width) y = np.clip(bbox[1].reshape(output_size.height, output_size.width), 0, single_image_target_size.height) x += idx * single_image_target_size.width top_left = (x[0, 0], y[0, 0]) top_right = (x[0, -1], y[0, -1]) bottom_left = (x[-1, 0], y[-1, 0]) bottom_right = (x[-1, -1], y[-1, -1]) draw.polygon( [top_left, top_right, bottom_right, bottom_left], outline=colour, )
def create_sliding_window(self, image): loaded_size = Size(*image.shape[-2:]) windows_at_width, width_overlap = self.get_num_windows_and_overlap( loaded_size.width, self.image_size.width) windows_at_height, height_overlap = self.get_num_windows_and_overlap( loaded_size.height, self.image_size.height) bboxes = [] crops = [] for j in range(windows_at_height): for i in range(windows_at_width): bbox = AxisAlignedBBox( left=i * self.image_size.width - i * width_overlap, top=j * self.image_size.height - j * height_overlap, right=(i + 1) * self.image_size.width - i * width_overlap, bottom=(j + 1) * self.image_size.height - j * height_overlap, ) bboxes.append(bbox) window = image[:, bbox.top:bbox.bottom, bbox.left:bbox.right] if self.binarize_windows: window = self.binarize_image(window) crops.append(window) return numpy.stack(crops, axis=0), bboxes
def __init__(self, *args, **kwargs): kwargs['resize_after_load'] = False kwargs['transform_probability'] = 0 image_size = kwargs.pop('image_size') if not isinstance(image_size, Size): image_size = Size(*image_size) self.max_size = kwargs.pop('max_size') self.image_size = image_size self.binarize_windows = kwargs.pop('binarize_windows', False) self.threshold_method = kwargs.pop('threshold_method', 'otsu') super().__init__()
def resize_image(self, image): loaded_size = Size(*image.shape[-2:]) if all( x < self.max_size for x in loaded_size ) and loaded_size.width > self.image_size.width and loaded_size.height > self.image_size.height: return image if loaded_size.width > loaded_size.height: aspect_ratio = loaded_size.height / loaded_size.width new_size = Size(height=max(int(self.max_size * aspect_ratio), self.image_size.height), width=self.max_size) else: aspect_ratio = loaded_size.width / loaded_size.height new_size = Size(height=self.max_size, width=max(int(self.max_size * aspect_ratio), self.image_size.width)) image = image.transpose(1, 2, 0) image = cv2.resize(image, (new_size.width, new_size.height), interpolation=cv2.INTER_AREA) image = image.transpose(2, 0, 1) return image
def __init__(self, image, out_dir, out_size, loss_metrics, **kwargs): super(BBOXPlotter, self).__init__() self.image = image self.render_extracted_rois = kwargs.pop("render_extracted_rois", True) self.image_size = Size(height=image.shape[1], width=image.shape[2]) self.out_dir = out_dir os.makedirs(self.out_dir, exist_ok=True) self.out_size = out_size self.colours = COLOR_MAP self.send_bboxes = kwargs.pop("send_bboxes", False) self.upstream_ip = kwargs.pop("upstream_ip", '127.0.0.1') self.upstream_port = kwargs.pop("upstream_port", 1337) self.loss_metrics = loss_metrics self.font = ImageFont.truetype("utils/DejaVuSans.ttf", 20) self.visualization_anchors = kwargs.pop("visualization_anchors", []) self.visual_backprop = VisualBackprop() self.xp = np
parser.add_argument("--load-recognition", action='store_true', default=False, help="only load recognition net") parser.add_argument("--is-trainer-snapshot", action='store_true', default=False, help="indicate that snapshot to load has been saved by trainer itself") parser.add_argument("--no-log", action='store_false', default=True, help="disable logging") parser.add_argument("--freeze-localization", action='store_true', default=False, help='freeze weights of localization net') parser.add_argument("--zoom", type=float, default=0.9, help="Zoom for initial bias of spatial transformer") parser.add_argument("--optimize-all-interval", type=int, default=5, help="interval in which to optimize the whole network instead of only a part") parser.add_argument("--use-dropout", action='store_true', default=False, help='use dropout in network') parser.add_argument("--test-image", help='path to an image that should be used by BBoxPlotter') parser = add_default_arguments(parser) args = parser.parse_args() args.is_original_fsns = True image_size = Size(width=150, height=150) target_shape = Size(width=100, height=75) # attributes that need to be adjusted, once the Curriculum decides to use # a more difficult dataset # this is a 'map' of attribute name to a path in the trainer object attributes_to_adjust = [ ('num_timesteps', ['predictor', 'localization_net']), ('num_timesteps', ['predictor', 'recognition_net']), ('num_timesteps', ['lossfun', '__self__']), ('num_labels', ['predictor', 'recognition_net']), ] # create train curriculum curriculum = BabyStepCurriculum( args.dataset_specification,
def save_extracted_regions(self, input, regions, transform_params, iteration, labels, gt_bboxes=None, extra_transform_params=None, extra_interpolated_areas=None): sampling_grids = get_sampling_grid(transform_params, self.output_size) if extra_transform_params is not None: extra_size = Size(width=self.output_size.width // 2, height=self.output_size.height // 2) extra_sampling_grids = get_sampling_grid( extra_transform_params, extra_size, ) else: extra_sampling_grids = np.zeros_like(sampling_grids) extra_interpolated_areas = np.zeros_like(sampling_grids) if self.plot_individual_regions or iteration % 1000 == 0: dest_image_size = ((len(regions) + 1) * self.image_size.width, self.image_size.height if not self.plot_extra_loc else 2 * self.image_size.height) else: dest_image_size = (self.image_size.width, self.image_size.height) dest_image = Image.new('RGB', dest_image_size) image = self.create_image_from_array(input, resize=False) draw = ImageDraw.Draw(image) data_iter = zip( np.split(regions, len(regions)), np.split(sampling_grids, len(sampling_grids)), np.split(extra_sampling_grids, len(extra_sampling_grids)), np.split(extra_interpolated_areas, len(extra_interpolated_areas)), COLOR_MAP, ) for idx, (region, bbox, extra_bbox, extra_region, colour) in enumerate(data_iter, start=1): # show each individual region if the user wants it or after every 1000 iterations for debugging purposes if self.plot_individual_regions or iteration % 1000 == 0: region_image = self.create_image_from_array(region) if self.plot_extra_loc: region_draw = ImageDraw.Draw(region_image) self.draw_bbox(extra_bbox, extra_size, self.image_size, region_draw, colour) extra_region_image = self.create_image_from_array( extra_region) dest_image.paste( extra_region_image, (idx * self.image_size.width, self.image_size.height)) dest_image.paste(region_image, (idx * self.image_size.width, 0)) # draw bbox self.draw_bbox(bbox, self.output_size, self.image_size, draw, colour) if gt_bboxes is not None: for gt_bbox, colour in zip(np.split(gt_bboxes, len(gt_bboxes)), reversed(COLOR_MAP)): gt_bbox = np.squeeze(gt_bbox, axis=0) top_left = (gt_bbox[0], gt_bbox[2]) top_right = (gt_bbox[0] + gt_bbox[1], gt_bbox[2]) bottom_left = (gt_bbox[0], gt_bbox[2] + gt_bbox[3]) bottom_right = (gt_bbox[0] + gt_bbox[1], gt_bbox[2] + gt_bbox[3]) draw.polygon( [top_left, top_right, bottom_right, bottom_left], outline=colour, ) if self.show_labels: # only keep ascii characters # labels = ''.join(filter(lambda x: len(x) == len(x.encode()), labels)) text_width, text_height = draw.textsize(labels, font=self.font) draw = ImageDraw.Draw(dest_image) draw.text((dest_image.width - text_width - 1, 0), labels, fill='green', font=self.font) dest_image.paste(image, (0, 0)) if self.save_labels: file_name = "{}_{}.png".format( os.path.join(self.bbox_dir, str(iteration)), labels) else: file_name = "{}.png".format( os.path.join(self.bbox_dir, str(iteration))) dest_image.save(file_name) if self.send_bboxes: self.send_image(dest_image)
action='store_true', default=False, help="indicate that you do not want to use the multi process iterator") parser.add_argument("--refinement", action='store_true', default=False, help='enable param refinement with IC-STN') parser.add_argument( "--render-all-bboxes", action='store_true', default=False, help="bbox plotter also renders all intermediate bboxes") parser = add_default_arguments(parser) args = parser.parse_args() image_size = Size(width=200, height=64) target_shape = Size(width=50, height=50) # attributes that need to be adjusted, once the Curriculum decides to use # a more difficult dataset # this is a 'map' of attribute name to path in trainer object attributes_to_adjust = [ ('num_timesteps', ['predictor', 'localization_net']), ('num_timesteps', ['predictor', 'recognition_net']), ('num_timesteps', ['lossfun', '__self__']), ('num_labels', ['predictor', 'recognition_net']), ] curriculum = BabyStepCurriculum(args.dataset_specification, TextRecFileDataset, args.blank_label,
default=5, help="number of timesteps the GRU shall run [default: 5]") parser.add_argument( "--alternative", action="store_true", default=False, help="use alternative implementation of spatial Transformers") parser.add_argument("-ds", dest='downsample_factor', type=int, default=2, help="downsample for image sampler") parser = add_default_arguments(parser) args = parser.parse_args() image_size = Size(width=200, height=200) localization_net = MNISTLocalizationNet(args.dropout_ratio, args.timesteps) recognition_net = MNISTRecognitionNet( image_size, args.dropout_ratio, downsample_factor=args.downsample_factor, use_alternative=args.alternative) net = MNISTNet(localization_net, recognition_net) model = Classifier(net, ('accuracy', ), lossfun=mnist_loss, accfun=mnist_accuracy) if args.gpu >= 0: cuda.get_device(args.gpu).use() model.to_gpu()
args = parser.parse_args() # set standard args that should always hold true if using the supplied model args.is_original_fsns = True args.log_name = 'log' args.dropout_ratio = 0.5 args.blank_symbol = 0 # max number of text regions in the image args.timesteps = 4 # max number of characters per word args.num_labels = 21 # open log and extract meta information with open(os.path.join(args.model_dir, args.log_name)) as the_log: log_data = json.load(the_log)[0] target_shape = Size._make(log_data['target_size']) image_size = Size._make(log_data['image_size']) xp = chainer.cuda.cupy if args.gpu >= 0 else np network = create_network(args, log_data) # load weights with np.load(os.path.join(args.model_dir, args.snapshot_name)) as f: chainer.serializers.NpzDeserializer(f).load(network) # load char map with open(args.char_map) as the_map: char_map = json.load(the_map) # load image image = load_image(args.image_path, xp)
help='plot bbox predictions and extracted regions') parser.add_argument( '--save-predictions', help='path to csv file where predictions shall be saved for later use') args = parser.parse_args() with open(args.char_map) as char_map_file: char_map = json.load(char_map_file) reverse_char_map = {v: k for k, v in char_map.items()} args.input_height = 150 args.input_width = 600 data_shape = (1, 4, 3, args.input_height, args.input_width // 4) input_size = Size(width=data_shape[-1], height=data_shape[-2]) output_size = Size(width=75, height=50) model = get_model(args, data_shape, output_size) bbox_plotter = None if args.plot: bbox_plotter = FSNSBBOXPlotter( input_size, output_size, os.path.dirname(os.path.dirname(args.model_prefix)), bbox_dir='eval_bboxes', show_labels=True, label_map=args.char_map, blank_label=args.blank_label, plot_extra_loc=False,
if __name__ == '__main__': parser = utils.parse_args() args = parser.parse_args() if args.send_bboxes and args.ip is None: parser.print_usage() raise ValueError( "You must specify an upstream ip if you want to send the bboxes of each iteration" ) time = datetime.datetime.now().isoformat() args.log_dir = os.path.join(args.log_dir, "{}_{}".format(time, args.log_name)) args.log_file = os.path.join(args.log_dir, 'log') image_size = Size(width=600, height=150) source_shape = (args.batch_size, 4, 1, image_size.height, image_size.width) target_shape = Size(width=75, height=50) num_timesteps = 3 labels_per_timestep = 10 values_per_bbox = 0 num_rnn_layers = 1 label_width = 37 label_width = num_timesteps * labels_per_timestep use_blstm = True save_attention = False eval_metric = mx.metric.CompositeEvalMetric(metrics=[ STNAccuracy(make_label_time_major=True), STNCrossEntropy(make_label_time_major=True), ])
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.image_size = Size(height=self.image.shape[1], width=self.image.shape[2] // 4)
action='store_true', default=False, help='plot bbox predictions and extracted regions') parser.add_argument( '--save-predictions', help='path to csv file where predictions shall be saved for later use') args = parser.parse_args() with open(args.char_map) as char_map_file: char_map = json.load(char_map_file) reverse_char_map = {v: k for k, v in char_map.items()} data_shape = (1, 1, args.input_height, args.input_width) input_size = Size(width=data_shape[-1], height=data_shape[-2]) output_size = Size(width=40, height=40) model = get_model(args, data_shape, output_size) bbox_plotter = None if args.plot: bbox_plotter = BBOXPlotter( input_size, output_size, os.path.dirname(os.path.dirname(args.model_prefix)), bbox_dir='eval_bboxes', show_labels=True, label_map=args.char_map, blank_label=args.blank_label, )