def __init__(self, set_list, rescale_range=None, transform=None, random_subset_size=None): self.rescale_range = rescale_range self.ids = set_list self.ids.sort() new_ids = [] for json_path, img_path in self.ids: gt_json = safe_load.json_state(json_path) if gt_json is None: continue failed = False for j, gt_item in enumerate(gt_json): if 'corners' not in gt_item: failed = True break if failed: continue new_ids.append([ json_path, img_path ]) self.ids = new_ids if random_subset_size is not None: self.ids = random.sample(self.ids, min(random_subset_size, len(self.ids))) print("SOL Ids Count:", len(self.ids)) self.transform = transform
def __getitem__(self, idx): gt_json_path, img_path = self.ids[idx] gt_json = safe_load.json_state(gt_json_path) if gt_json is None: return None # print('img_path: {}'.format(img_path)) org_img = cv2.imread(img_path, cv2.IMREAD_COLOR) # print('img.size: {}'.format(org_img.shape)) # median = np.median(org_img, axis=(0,1)) # org_img = cv2.copyMakeBorder(org_img,100,100,100,100,cv2.BORDER_CONSTANT,value=median) target_dim1 = int(np.random.uniform(self.rescale_range[0], self.rescale_range[1])) s = target_dim1 / float(org_img.shape[1]) target_dim0 = int(org_img.shape[0]/float(org_img.shape[1]) * target_dim1) org_img = cv2.resize(org_img,(target_dim1, target_dim0), interpolation = cv2.INTER_CUBIC) gt = np.zeros((1,len(gt_json['corners']), 4), dtype=np.float32) for j, gt_item in enumerate(gt_json['corners']): x0 = gt_item[0] x1 = gt_item[0] y0 = gt_item[1] y1 = gt_item[1] gt[:,j,0] = x0 * s gt[:,j,1] = y0 * s gt[:,j,2] = x1 * s gt[:,j,3] = y1 * s if self.transform is not None: out = self.transform({ "img": org_img, "sol_gt": gt }) org_img = out['img'] gt = out['sol_gt'] org_img = augmentation.apply_random_color_rotation(org_img) org_img = augmentation.apply_tensmeyer_brightness(org_img) org_img = augmentation.apply_random_blur(org_img) img = org_img.transpose([2,1,0])[None,...] img = img.astype(np.float32) img = torch.from_numpy(img) img = img / 128.0 - 1.0 if gt.shape[1] == 0: gt = None else: gt = torch.from_numpy(gt) return { "img": img, "sol_gt": gt }
def __init__(self, set_list, char_to_idx, augmentation=False, img_height=32, random_subset_size=None): self.img_height = img_height self.ids = set_list self.ids.sort() self.detailed_ids = [] for ids_idx, paths in enumerate(self.ids): json_path, img_path = paths d = safe_load.json_state(json_path) if d is None: continue for i in range(len(d)): if 'hw_path' not in d[i]: continue self.detailed_ids.append((ids_idx, i)) if random_subset_size is not None: self.detailed_ids = random.sample( self.detailed_ids, min(random_subset_size, len(self.detailed_ids))) print(len(self.detailed_ids)) self.char_to_idx = char_to_idx self.augmentation = augmentation self.warning = False
def __getitem__(self, idx): gt_json_path, img_path = self.ids[idx] gt_json = safe_load.json_state(gt_json_path) if gt_json is None: return None org_img = cv2.imread(img_path) target_dim1 = int(np.random.uniform(self.rescale_range[0], self.rescale_range[1])) s = target_dim1 / float(org_img.shape[1]) target_dim0 = int(org_img.shape[0]/float(org_img.shape[1]) * target_dim1) org_img = cv2.resize(org_img,(target_dim1, target_dim0), interpolation = cv2.INTER_CUBIC) gt = np.zeros((1,len(gt_json), 4), dtype=np.float32) for j, gt_item in enumerate(gt_json): if 'sol' not in gt_item: continue x0 = gt_item['sol']['x0'] x1 = gt_item['sol']['x1'] y0 = gt_item['sol']['y0'] y1 = gt_item['sol']['y1'] gt[:,j,0] = x0 * s gt[:,j,1] = y0 * s gt[:,j,2] = x1 * s gt[:,j,3] = y1 * s if self.transform is not None: out = self.transform({ "img": org_img, "sol_gt": gt }) org_img = out['img'] gt = out['sol_gt'] org_img = augmentation.apply_random_color_rotation(org_img) org_img = augmentation.apply_tensmeyer_brightness(org_img) img = org_img.transpose([2,1,0])[None,...] img = img.astype(np.float32) img = torch.from_numpy(img) img = img / 128.0 - 1.0 if gt.shape[1] == 0: gt = None else: gt = torch.from_numpy(gt) return { "img": img, "sol_gt": gt }
def __getitem__(self, idx): ids_idx, line_idx = self.detailed_ids[idx] gt_json_path, img_path = self.ids[ids_idx] gt_json = safe_load.json_state(gt_json_path) positions = [] positions_xy = [] if 'lf' not in gt_json[line_idx]: return None for step in gt_json[line_idx]['lf']: x0 = step['x0'] x1 = step['x1'] y0 = step['y0'] y1 = step['y1'] positions_xy.append((torch.Tensor([[x1, x0], [y1, y0]]))) dx = x0 - x1 dy = y0 - y1 d = math.sqrt(dx**2 + dy**2) mx = (x0 + x1) / 2.0 my = (y0 + y1) / 2.0 #Not sure if this is right... theta = -math.atan2(dx, -dy) positions.append(torch.Tensor([mx, my, theta, d / 2, 1.0])) img = cv2.imread(img_path) if self.augmentation: img = augmentation.apply_random_color_rotation(img) img = augmentation.apply_tensmeyer_brightness(img) img = img.astype(np.float32) img = img.transpose() img = img / 128.0 - 1.0 img = torch.from_numpy(img) gt = gt_json[line_idx]['gt'] result = { "img": img, "lf_xyrs": positions, "lf_xyxy": positions_xy, "gt": gt } return result
def __getitem__(self, idx): ids_idx, line_idx = self.detailed_ids[idx] gt_json_path, img_path = self.ids[ids_idx] gt_json = safe_load.json_state(gt_json_path) if gt_json is None: return None if 'hw_path' not in gt_json[line_idx]: return None hw_path = gt_json[line_idx]['hw_path'] hw_path = hw_path.split("/")[-1:] hw_path = "/".join(hw_path) hw_folder = os.path.dirname(gt_json_path) img = cv2.imread(os.path.join(hw_folder, hw_path)) if img is None: return None if img.shape[0] != self.img_height: if img.shape[0] < self.img_height and not self.warning: self.warning = True print "WARNING: upsampling image to fit size" percent = float(self.img_height) / img.shape[0] img = cv2.resize(img, (0,0), fx=percent, fy=percent, interpolation = cv2.INTER_CUBIC) if img is None: return None if self.augmentation: img = augmentation.apply_random_color_rotation(img) img = augmentation.apply_tensmeyer_brightness(img) img = grid_distortion.warp_image(img) img = img.astype(np.float32) img = img / 128.0 - 1.0 gt = gt_json[line_idx]['gt'] if len(gt) == 0: return None gt_label = string_utils.str2label_single(gt, self.char_to_idx) return { "line_img": img, "gt": gt, "gt_label": gt_label }
def __getitem__(self, idx): gt_json_path, img_path = self.ids[idx] gt_json = [] if not self.ignore_json: gt_json = safe_load.json_state(gt_json_path) if gt_json is None: return None org_img = cv2.imread(img_path) full_img = org_img.astype(np.float32) full_img = full_img.transpose([2, 1, 0])[None, ...] full_img = torch.from_numpy(full_img) full_img = full_img / 128 - 1 target_dim1 = self.resize_width s = target_dim1 / float(org_img.shape[1]) target_dim0 = int(org_img.shape[0] / float(org_img.shape[1]) * target_dim1) img = cv2.resize(org_img, (target_dim1, target_dim0), interpolation=cv2.INTER_CUBIC) img = img.astype(np.float32) img = img.transpose([2, 1, 0])[None, ...] img = torch.from_numpy(img) img = img / 128 - 1 image_key = gt_json_path[:-len('.json')] return { "resized_img": img, "full_img": full_img, "resize_scale": 1.0 / s, "gt_lines": [x['gt'] for x in gt_json], "img_key": image_key, "json_path": gt_json_path, "gt_json": gt_json }
def __init__(self, set_list, random_subset_size=None, augmentation=False): self.augmentation = augmentation self.ids = set_list self.ids.sort() self.detailed_ids = [] for ids_idx, paths in enumerate(self.ids): json_path, img_path = paths d = safe_load.json_state(json_path) if d is None: continue for i in range(len(d)): if 'lf' not in d[i]: continue self.detailed_ids.append((ids_idx, i)) if random_subset_size is not None: self.detailed_ids = random.sample( self.detailed_ids, min(len(self.ids), random_subset_size)) print((len(self.detailed_ids)))
def __getitem__(self, idx): gt_json_path, img_path = self.ids[idx] gt_json = safe_load.json_state(gt_json_path) if gt_json is None: return None org_img = cv2.imread(img_path) target_dim1 = int( np.random.uniform(self.rescale_range[0], self.rescale_range[1])) s = target_dim1 / float(org_img.shape[1]) target_dim0 = int(org_img.shape[0] / float(org_img.shape[1]) * target_dim1) org_img = cv2.resize(org_img, (target_dim1, target_dim0), interpolation=cv2.INTER_CUBIC) gt = np.zeros((1, len(gt_json), 4), dtype=np.float32) positions = [] positions_xy = [] for j, gt_item in enumerate(gt_json): if 'sol' not in gt_item: continue x0 = gt_item['sol']['x0'] * s x1 = gt_item['sol']['x1'] * s y0 = gt_item['sol']['y0'] * s y1 = gt_item['sol']['y1'] * s positions_xy.append([(torch.Tensor([[x1, x0], [y1, y0]]))]) dx = x0 - x1 dy = y0 - y1 d = math.sqrt(dx**2 + dy**2) mx = (x0 + x1) / 2.0 my = (y0 + y1) / 2.0 # Not sure if this is right... theta = -math.atan2(dx, -dy) positions.append([torch.Tensor([mx, my, theta, d / 2, 1.0])]) gt[:, j, 0] = x0 gt[:, j, 1] = y0 gt[:, j, 2] = x1 gt[:, j, 3] = y1 if self.transform is not None: out = self.transform({"img": org_img, "sol_gt": gt}) org_img = out['img'] gt = out['sol_gt'] org_img = augmentation.apply_random_color_rotation(org_img) org_img = augmentation.apply_tensmeyer_brightness(org_img) img = org_img.transpose([2, 1, 0])[None, ...] img = img.astype(np.float32) img = torch.from_numpy(img) img = img / 128.0 - 1.0 if gt.shape[1] == 0: gt = None else: gt = torch.from_numpy(gt) return { "scale": s, "img_path": img_path, "img": img, "sol_gt": gt, "lf_xyrs": positions, "lf_xyxy": positions_xy, }