def test_rotation(opt): result_dir = os.path.join(args.path, args.code_name, "result+" + "-".join(opt.model_prefix_list)) if not os.path.exists(result_dir): os.makedirs(result_dir) # Load assert len(opt.model_prefix_list) <= torch.cuda.device_count(), \ "number of models should not exceed the device numbers" nets = [] for _, prefix in enumerate(opt.model_prefix_list): net = model.SSD(cfg, connect_loc_to_conf=True, fix_size=False, incep_conf=True, incep_loc=True) device_id = opt.device_id if len(opt.model_prefix_list) == 1 else _ net = net.to("cuda:%d" % (device_id)) net_dict = net.state_dict() weight_dict = util.load_latest_model(args, net, prefix=prefix, return_state_dict=True, nth=opt.nth_best_model) loading_fail_signal = False for i, key in enumerate(net_dict.keys()): if "module." + key not in weight_dict: net_dict[key] = torch.zeros(net_dict[key].shape) for key in weight_dict.keys(): if key[7:] in net_dict: if net_dict[key[7:]].shape == weight_dict[key].shape: net_dict[key[7:]] = weight_dict[key] else: print( "Key: %s from disk has shape %s copy to the model with shape %s" % (key[7:], str(weight_dict[key].shape), str(net_dict[key[7:]].shape))) loading_fail_signal = True else: print("Key: %s does not exist in net_dict" % (key[7:])) if loading_fail_signal: raise RuntimeError( 'Shape Error happens, remove "%s" from your -mpl settings.' % (prefix)) net.load_state_dict(net_dict) net.eval() nets.append(net.half()) print("Above model loaded with out a problem") detector = model.Detect(num_classes=2, bkg_label=0, top_k=opt.detector_top_k, conf_thresh=opt.detector_conf_threshold, nms_thresh=opt.detector_nms_threshold) # Enumerate test folder root_path = os.path.expanduser(opt.test_dataset_root) if not os.path.exists(root_path): raise FileNotFoundError( "%s does not exists, please check your -tdr/--test_dataset_root settings" % (root_path)) img_list = glob.glob(root_path + "/*.%s" % (opt.extension)) precisions, recalls = [], [] for i, img_file in enumerate(sorted(img_list)): start = time.time() name = img_file[img_file.rfind("/") + 1:-4] img = cv2.imread(img_file) height_ori, width_ori = img.shape[0], img.shape[1] # detect rotation for returning the image back transform_det = {"rotation": 0} # Resize the longer side to a certain length if height_ori >= width_ori: resize_aug = augmenters.Sequential([ augmenters.Resize(size={ "height": opt.test_size, "width": "keep-aspect-ratio" }) ]) else: resize_aug = augmenters.Sequential([ augmenters.Resize(size={ "height": "keep-aspect-ratio", "width": opt.test_size }) ]) resize_aug = resize_aug.to_deterministic() image = resize_aug.augment_image(img) h_re, w_re = image.shape[0], image.shape[1] # Pad the image into a square image pad_aug = augmenters.Sequential( augmenters.PadToFixedSize(width=opt.test_size, height=opt.test_size, pad_cval=255, position="center")) pad_aug = pad_aug.to_deterministic() image = pad_aug.augment_image(image) h_final, w_final = image.shape[0], image.shape[1] # Prepare image tensor and test image_t = torch.Tensor(util.normalize_image(args, image)).unsqueeze(0) image_t = image_t.permute(0, 3, 1, 2) #visualize_bbox(args, cfg, image, [torch.Tensor(rot_coord).cuda()], net.prior, height_final/width_final) text_boxes = [] for _, net in enumerate(nets): device_id = opt.device_id if len(nets) == 1 else _ image_t = image_t.to("cuda:%d" % (device_id)).half() out = net(image_t, is_train=False) loc_data, conf_data, prior_data = out prior_data = prior_data.to("cuda:%d" % (device_id)) det_result = detector(loc_data, conf_data, prior_data) # Extract the predicted bboxes idx = det_result.data[0, 1, :, 0] >= 0.1 text_boxes.append(det_result.data[0, 1, idx, 1:]) text_boxes = torch.cat(text_boxes, dim=0) text_boxes = combine_boxes(text_boxes, img=image_t) pred = [[float(coor) for coor in area] for area in text_boxes] BBox = [ imgaug.augmentables.bbs.BoundingBox(box[0] * w_final, box[1] * h_final, box[2] * w_final, box[3] * h_final) for box in pred ] BBoxes = imgaug.augmentables.bbs.BoundingBoxesOnImage( BBox, shape=image.shape) return_aug = augment_back(transform_det, height_ori, width_ori, (h_final - h_re) / 2, (w_final - w_re) / 2) return_aug = return_aug.to_deterministic() img_ori = return_aug.augment_image(image) bbox = return_aug.augment_bounding_boxes([BBoxes])[0] f = open(os.path.join(result_dir, name + ".txt"), "w") pred_final = [] for box in bbox.bounding_boxes: x1, y1, x2, y2 = int(round(box.x1)), int(round(box.y1)), int( round(box.x2)), int(round(box.y2)) pred_final.append([x1, y1, x2, y2]) #box_tensors.append(torch.tensor([x1, y1, x2, y2])) # 4-point to 8-point: x1, y1, x2, y1, x2, y2, x1, y2 f.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (x1, y1, x2, y1, x2, y2, x1, y2)) cv2.rectangle(img, (x1, y1), (x2, y2), (255, 105, 65), 2) #accu, precision, recall = measure(torch.Tensor(pred_final).cuda(), torch.Tensor(gt_coords).cuda(), #width=img.shape[1], height=img.shape[0]) img_save_directory = os.path.join( args.path, args.code_name, "val+" + "-".join(opt.model_prefix_list)) if not os.path.exists(img_save_directory): os.mkdir(img_save_directory) _imgh, _imgw, _imgc = img.shape _imgh = _imgh * opt.test_size / _imgw img = cv2.resize(img, (opt.test_size, int(_imgh))) #cv2.imwrite(os.path.join(img_save_directory, name + ".jpg"), img) cv2.imwrite(os.path.join(img_save_directory, "%04d.jpg" % i), img) f.close() print("%d th image cost %.2f seconds" % (i, time.time() - start))
def test_rotation(opt): result_dir = os.path.join(args.path, args.code_name, "result+" + "-".join(opt.model_prefix_list)) if not os.path.exists(result_dir): os.makedirs(result_dir) # Load if type(opt.model_prefix_list) is str: opt.model_prefix_list = [opt.model_prefix_list] assert len(opt.model_prefix_list) <= torch.cuda.device_count(), \ "number of models should not exceed the device numbers" nets = [] for _, prefix in enumerate(opt.model_prefix_list): #net = model.SSD(cfg, connect_loc_to_conf=True, fix_size=False, #conf_incep=True, loc_incep=True) net = model.SSD(cfg, connect_loc_to_conf=opt.loc_to_conf, fix_size=False, conf_incep=opt.conf_incep, loc_incep=opt.loc_incep, loc_preconv=opt.loc_preconv, conf_preconv=opt.conf_preconv, FPN=opt.feature_pyramid_net, SA=opt.self_attention, in_wid=opt.inner_filters, m_factor=opt.inner_m_factor) device_id = opt.device_id if len(opt.model_prefix_list) == 1 else _ net = net.to("cuda:%d" % (device_id)) net_dict = net.state_dict() weight_dict = util.load_latest_model(args, net, prefix=prefix, return_state_dict=True, nth=opt.nth_best_model) loading_fail_signal = False for i, key in enumerate(net_dict.keys()): if "module." + key not in weight_dict: net_dict[key] = torch.zeros(net_dict[key].shape) for key in weight_dict.keys(): if key[7:] in net_dict: if net_dict[key[7:]].shape == weight_dict[key].shape: net_dict[key[7:]] = weight_dict[key] else: print( "Key: %s from disk has shape %s copy to the model with shape %s" % (key[7:], str(weight_dict[key].shape), str(net_dict[key[7:]].shape))) loading_fail_signal = True else: print("Key: %s does not exist in net_dict" % (key[7:])) if loading_fail_signal: raise RuntimeError( 'Shape Error happens, remove "%s" from your -mpl settings.' % (prefix)) net.load_state_dict(net_dict) net.eval() nets.append(net) print("Above model loaded with out a problem") detector = model.Detect(num_classes=2, bkg_label=0, top_k=opt.detector_top_k, conf_thresh=opt.detector_conf_threshold, nms_thresh=opt.detector_nms_threshold) # Enumerate test folder root_path = os.path.expanduser(opt.test_dataset_root) if not os.path.exists(root_path): raise FileNotFoundError( "%s does not exists, please check your -tdr/--test_dataset_root settings" % (root_path)) img_list = glob.glob(root_path + "/*.%s" % (opt.extension)) precisions, recalls = [], [] for i, img_file in enumerate(sorted(img_list)): start = time.time() name = img_file[img_file.rfind("/") + 1:-4] img = cv2.imread(img_file) height_ori, width_ori = img.shape[0], img.shape[1] do_rotation = False if do_rotation: # detect rotation for returning the image back img, transform_det = estimate_angle(img, args, None, None, None) transform_det["rotation"] = 0 if transform_det["rotation"] != 0: rot_aug = augmenters.Affine(rotate=transform_det["rotation"], cval=args.aug_bg_color) else: rot_aug = None # Perform Augmentation if rot_aug: rot_aug = augmenters.Sequential( augmenters.Affine(rotate=transform_det["rotation"], cval=args.aug_bg_color)) image = rot_aug.augment_image(img) else: image = img else: image = img # Resize the longer side to a certain length if height_ori >= width_ori: resize_aug = augmenters.Sequential([ augmenters.Resize(size={ "height": square, "width": "keep-aspect-ratio" }) ]) else: resize_aug = augmenters.Sequential([ augmenters.Resize(size={ "height": "keep-aspect-ratio", "width": square }) ]) resize_aug = resize_aug.to_deterministic() image = resize_aug.augment_image(image) h_re, w_re = image.shape[0], image.shape[1] # Pad the image into a square image pad_aug = augmenters.Sequential( augmenters.PadToFixedSize(width=square, height=square, pad_cval=255, position="center")) pad_aug = pad_aug.to_deterministic() image = pad_aug.augment_image(image) h_final, w_final = image.shape[0], image.shape[1] # Prepare image tensor and test image_t = torch.Tensor(util.normalize_image(args, image)).unsqueeze(0) image_t = image_t.permute(0, 3, 1, 2) # visualize_bbox(args, cfg, image, [torch.Tensor(rot_coord).cuda()], net.prior, height_final/width_final) text_boxes = [] for _, net in enumerate(nets): device_id = opt.device_id if len(nets) == 1 else _ image_t = image_t.to("cuda:%d" % (device_id)) out = net(image_t, is_train=False) loc_data, conf_data, prior_data = out prior_data = prior_data.to("cuda:%d" % (device_id)) det_result = detector(loc_data, conf_data, prior_data) # Extract the predicted bboxes idx = det_result.data[0, 1, :, 0] >= 0.1 text_boxes.append(det_result.data[0, 1, idx, 1:]) text_boxes = torch.cat(text_boxes, dim=0) text_boxes = combine_boxes(text_boxes, img=image_t) pred = [[float(coor) for coor in area] for area in text_boxes] BBox = [ imgaug.augmentables.bbs.BoundingBox(box[0] * w_final, box[1] * h_final, box[2] * w_final, box[3] * h_final) for box in pred ] BBoxes = imgaug.augmentables.bbs.BoundingBoxesOnImage( BBox, shape=image.shape) return_aug = augment_back(height_ori, width_ori, (h_final - h_re) / 2, (w_final - w_re) / 2) return_aug = return_aug.to_deterministic() img_ori = return_aug.augment_image(image) bbox = return_aug.augment_bounding_boxes([BBoxes])[0] # print_box(blue_boxes=pred, idx=i, img=vb.plot_tensor(args, image_t, margin=0), # save_dir=args.val_log) f = open(os.path.join(result_dir, name + ".txt"), "w") gt_box_file = os.path.join(opt.test_dataset_root, name + "." + opt.ground_truth_extension) coords = tb_data.parse_file(os.path.expanduser(gt_box_file)) gt_coords = [] for coord in coords: x1, x2 = min(coord[::2]), max(coord[::2]) y1, y2 = min(coord[1::2]), max(coord[1::2]) gt_coords.append([x1, y1, x2, y2]) cv2.rectangle(img, (x1, y1), (x2, y2), (70, 67, 238), 2) pred_final = [] for box in bbox.bounding_boxes: x1, y1, x2, y2 = int(round(box.x1)), int(round(box.y1)), int( round(box.x2)), int(round(box.y2)) pred_final.append([x1, y1, x2, y2]) # box_tensors.append(torch.tensor([x1, y1, x2, y2])) # 4-point to 8-point: x1, y1, x2, y1, x2, y2, x1, y2 f.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (x1, y1, x2, y1, x2, y2, x1, y2)) cv2.rectangle(img, (x1, y1), (x2, y2), (255, 105, 65), 2) accu, precision, recall = measure(torch.Tensor(pred_final).cuda(), torch.Tensor(gt_coords).cuda(), width=img.shape[1], height=img.shape[0]) precisions.append(precision) recalls.append(recall) img_save_directory = os.path.join( args.path, args.code_name, "val+" + "-".join(opt.model_prefix_list)) if not os.path.exists(img_save_directory): os.mkdir(img_save_directory) name = "%s_%.2f_%.2f" % (str(i).zfill(4), precision, recall) cv2.imwrite(os.path.join(img_save_directory, name + ".jpg"), img) f.close() if opt.verbose: print( "%d th image cost %.2f seconds, precision: %.2f, recall: %.2f" % (i, time.time() - start, precision, recall)) print("Precision: %.2f, Recall: %.2f" % (avg(precisions), avg(recalls)))
def to_tensor(args, image, seed=None, size=None): image = util.normalize_image(args, image) trans = T.Compose([T.ToTensor()]) return trans(image.astype("float32"))
def estimate_angle_and_crop_area(signal, args, path, seed, size, device=None): """ Pre-Process function for SROIE Remove the white sorrounding areas of input images """ def norm_zero_one(x): min_v = torch.min(x) return (x - min_v) / (torch.max(x) - min_v) img = signal transform_det = {} threshold = 0.15 if device is None: #device = args.device device = "cpu" gaussian_kernal = (0.1, 0.2, 0.4, 0.2, 0.1) ascend_kernel = (0.0, 0.25, 0.5, 0.75, 1.0) descend_kernel = (1.0, 0.75, 0.5, 0.25, 0.0) # Use CLAHE to enhance the contrast signal, _ = clahe_inv(signal, args, path, seed, size) original_size = signal.shape # Resize to small size result to bad estimation #width = original_size[1] / original_size[0] * 500 #_signal = cv2.resize(signal, (int(width), 500)) angle = detect_angle(signal) if angle is not None and abs(angle) * 90 > 1: signal = rotate_image(signal, angle) # After rotation, the image size will change original_size = signal.shape if len(signal.shape) == 2: signal = np.expand_dims(signal, -1).astype(np.float32) else: signal = signal.astype(np.float32) signal = util.normalize_image(args, signal) # Transform (H, W, C) nd.array into (C, H, W) Tensor signal = torch.Tensor(signal).float().permute(2, 0, 1) # Convert to grey scale signal = torch.sum(signal, 0) # signal_x and signal_y represent the horizontal and vertical signal strength signal_x = (torch.sum(signal, dim=0) / signal.size(0)).unsqueeze(0).unsqueeze(0).to(device) signal_x = 1 - norm_zero_one(signal_x) signal_y = (torch.sum(signal, dim=1) / signal.size(1)).unsqueeze(0).unsqueeze(0).to(device) signal_y = 1 - norm_zero_one(signal_y) # Define the kernel #gaussian = args.gaussian #detector1 = args.detector1 #detector2 = args.detector2 gaussian = torch.tensor(gaussian_kernal).unsqueeze(0).unsqueeze(0).to( device) detector1 = torch.tensor(ascend_kernel).unsqueeze(0).unsqueeze(0).to( device) detector2 = torch.tensor(descend_kernel).unsqueeze(0).unsqueeze(0).to( device) start, end = [], [] for signal in [signal_x, signal_y]: # Due to the size is very big, we do not need zero-padding smooth_signal = F.conv1d(signal, gaussian, stride=1, padding=0) # Calculate first derivative ascend_signal = F.conv1d(smooth_signal, detector1, stride=1, padding=0).squeeze() descend_signal = F.conv1d(smooth_signal, detector2, stride=1, padding=0).squeeze() # safe distance is 5% of current signal length safe_distance = int(0.05 * signal.size(-1)) start_idx = (ascend_signal >= threshold).nonzero().squeeze(1) if start_idx.nelement() == 0: # Cannot find a ascend signal stronger than threshold _start = 0 else: _start = max(0, int(start_idx[0]) - safe_distance) end_idx = (descend_signal >= threshold).nonzero().squeeze(1) if end_idx.nelement() == 0: _end = signal.size(-1) else: _end = min(signal.size(-1), int(end_idx[-1]) + safe_distance) if _end > _start + 300: start.append(_start) end.append(_end) else: print("assume some error happens in smart crop") start.append(0) end.append(signal.size(-1)) if angle is not None and abs(angle) * 90 > 1: print("angle: %s" % (angle)) transform_det.update({"rotation": angle * 90}) # 4 dimension means distance to top, right, bottom, left crop_area = (start[1], int(original_size[1] - end[0]), int(original_size[0] - end[1]), int(start[0])) if not crop_area == (0, 0, 0, 0): transform_det.update({"crop": crop_area}) return img, transform_det