def generate_next_targets(original_dim, output, image, base_target_path, image_name, annots, dataloader, no): if 'datapile' in config.dataset_name: image_name = image_name.split('/')[-1] # visualize = config.visualize_generated and no % config.visualize_freq == 0 and no != 0 visualize = config.visualize_generated # Just for debuging max_dim = original_dim.max() resizing_factor = 768 / max_dim before_pad_dim = [ int(original_dim[0] * resizing_factor), int(original_dim[1] * resizing_factor) ] output = np.uint8(output * 255) height_pad = (768 - before_pad_dim[0]) // 2 width_pad = (768 - before_pad_dim[1]) // 2 character_bbox = cv2.resize( output[0, height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]], (original_dim[1] // 2, original_dim[0] // 2)) / 255 affinity_bbox = cv2.resize( output[1, height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]], (original_dim[1] // 2, original_dim[0] // 2)) / 255 # Generating word-bbox given character and affinity heatmap generated_targets = generate_word_bbox( character_bbox, affinity_bbox, character_threshold=config.threshold_character, affinity_threshold=config.threshold_affinity, word_threshold=config.threshold_word, character_threshold_upper=config.threshold_character_upper, affinity_threshold_upper=config.threshold_affinity_upper, scaling_character=config.scale_character, scaling_affinity=config.scale_affinity) generated_targets['word_bbox'] = generated_targets['word_bbox'] * 2 generated_targets['characters'] = [ i * 2 for i in generated_targets['characters'] ] generated_targets['affinity'] = [ i * 2 for i in generated_targets['affinity'] ] if visualize: character_bbox = cv2.resize((character_bbox * 255).astype(np.uint8), (original_dim[1], original_dim[0])) / 255 affinity_bbox = cv2.resize((affinity_bbox * 255).astype(np.uint8), (original_dim[1], original_dim[0])) / 255 image_i = denormalize_mean_variance(image.data.cpu().numpy().transpose( 1, 2, 0)) image_i = cv2.resize( image_i[height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]], (original_dim[1], original_dim[0])) # Saving affinity heat map plt.imsave(base_target_path + '_predicted/affinity/' + '.'.join(image_name.split('.')[:-1]) + '.png', np.float32(affinity_bbox > config.threshold_affinity_upper), cmap='gray') # Saving character heat map plt.imsave( base_target_path + '_predicted/character/' + '.'.join(image_name.split('.')[:-1]) + '.png', np.float32(character_bbox > config.threshold_character_upper), cmap='gray') cv2.drawContours(image_i, generated_targets['word_bbox'], -1, (0, 255, 0), 2) # Saving word bbox drawn on the original image plt.imsave( base_target_path + '_predicted/word_bbox/' + '.'.join(image_name.split('.')[:-1]) + '.png', image_i) predicted_word_bbox = generated_targets['word_bbox'].copy() # --------------- PostProcessing for creating the targets for the next iteration ---------------- # generated_targets = get_weighted_character_target( generated_targets, { 'bbox': annots['bbox'], 'text': annots['text'] }, dataloader.dataset.unknown, config.threshold_fscore, config.weight_threshold) target_word_bbox = generated_targets['word_bbox'].copy() f_score = calculate_fscore( predicted_word_bbox[:, :, 0, :], target_word_bbox[:, :, 0, :], text_target=annots['text'], unknown=dataloader.dataset.gt['unknown'])['f_score'] if visualize: image_i = denormalize_mean_variance(image.data.cpu().numpy().transpose( 1, 2, 0)) image_i = cv2.resize( image_i[height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]], (original_dim[1], original_dim[0])) # Generated word_bbox after postprocessing cv2.drawContours(image_i, generated_targets['word_bbox'], -1, (0, 255, 0), 2) # Saving word bbox after postprocessing plt.imsave( base_target_path + '_next_target/word_bbox/' + '.'.join(image_name.split('.')[:-1]) + '.png', image_i) # Generate affinity heatmap after postprocessing affinity_target, affinity_weight_map = generate_target_others( (image_i.shape[0], image_i.shape[1]), generated_targets['affinity'].copy(), np.array(generated_targets['weights'])[:, 1]) # Generate character heatmap after postprocessing character_target, characters_weight_map = generate_target_others( (image_i.shape[0], image_i.shape[1]), generated_targets['characters'].copy(), np.array(generated_targets['weights'])[:, 0]) # Saving the affinity heatmap plt.imsave(base_target_path + '_next_target/affinity/' + '.'.join(image_name.split('.')[:-1]) + '.png', affinity_target, cmap='gray') # Saving the character heatmap plt.imsave(base_target_path + '_next_target/character/' + '.'.join(image_name.split('.')[:-1]) + '.png', character_target, cmap='gray') # Saving the affinity weight map plt.imsave(base_target_path + '_next_target/affinity_weight/' + '.'.join(image_name.split('.')[:-1]) + '.png', affinity_weight_map, cmap='gray') # Saving the character weight map plt.imsave(base_target_path + '_next_target/character_weight/' + '.'.join(image_name.split('.')[:-1]) + '.png', characters_weight_map, cmap='gray') # Saving the target for next iteration in json format generated_targets['word_bbox'] = generated_targets['word_bbox'].tolist() generated_targets['characters'] = [ word_i.tolist() for word_i in generated_targets['characters'] ] generated_targets['affinity'] = [ word_i.tolist() for word_i in generated_targets['affinity'] ] with open(base_target_path + '/' + image_name + '.json', 'w') as f: json.dump(generated_targets, f) return f_score
def __getitem__(self, item_i): # noinspection PyArgumentList np.random.seed() check = np.random.uniform() if check < config.prob_synth and self.type_ == 'train': # probability of picking a Synth-Text image vs Image from dataset random_item = np.random.randint(len(self.imnames)) character = self.charBB[random_item].copy() image = plt.imread(self.base_path_synth + '/' + self.imnames[random_item][0]) # Read the image if len(image.shape) == 2: image = np.repeat(image[:, :, None], repeats=3, axis=2) elif image.shape[2] == 1: image = np.repeat(image, repeats=3, axis=2) else: image = image[:, :, 0:3] height, width, channel = image.shape image, character = resize( image, character) # Resize the image to (768, 768) image = normalize_mean_variance(image).transpose(2, 0, 1) # Generate character heatmap with weights weight_character, weak_supervision_char = generate_target( image.shape, character.copy(), weight=1) # Generate affinity heatmap with weights weight_affinity, weak_supervision_affinity = generate_affinity( image.shape, character.copy(), self.txt[random_item].copy(), weight=1) dataset_name = 'SYNTH' text_target = '' else: random_item = np.random.randint(len(self.gt)) image = plt.imread(self.base_path_other_images + '/' + self.gt[random_item][0]) # Read the image if len(image.shape) == 2: image = np.repeat(image[:, :, None], repeats=3, axis=2) elif image.shape[2] == 1: image = np.repeat(image, repeats=3, axis=2) else: image = image[:, :, 0:3] height, width, channel = image.shape character = [ np.array(word_i).reshape([len(word_i), 4, 1, 2]) for word_i in self.gt[random_item][1]['characters'].copy() ] affinity = [ np.array(word_i).reshape([len(word_i), 4, 1, 2]) for word_i in self.gt[random_item][1]['affinity'].copy() ] assert len(character) == len( affinity), 'word length different in character and affinity' # Resize the image to (768, 768) image, character, affinity = resize_generated( image, character.copy(), affinity.copy()) image = normalize_mean_variance(image).transpose(2, 0, 1) weights = [i for i in self.gt[random_item][1]['weights'].copy()] text_target = '#@#@#@'.join(self.gt[random_item][1]['text']) assert len(self.gt[random_item][1]['text']) == len(self.gt[random_item][1]['word_bbox']), \ 'Length of word_bbox != Length of text' # assert len(text_target.split('#@#@#@')) == len(self.gt[random_item][1]['word_bbox']), \ # 'Some error in splitting' # Generate character heatmap with weights weight_character, weak_supervision_char = generate_target_others( image.shape, character.copy(), weights.copy()) # Generate affinity heatmap with weights weight_affinity, weak_supervision_affinity = generate_target_others( image.shape, affinity.copy(), weights.copy()) # Get original word_bbox annotations dataset_name = 'ICDAR' return \ image.astype(np.float32), \ weight_character.astype(np.float32), \ weight_affinity.astype(np.float32), \ weak_supervision_char.astype(np.float32), \ weak_supervision_affinity.astype(np.float32), \ dataset_name, \ text_target, \ random_item, \ np.array([height, width])
def __getitem__(self, item_i): height, width, channel = 0, 0, 0 # noinspection PyArgumentList np.random.seed() check = np.random.uniform() if check < config.prob_synth and self.type_ == 'train': # probability of picking a Synth-Text image vs Image from dataset random_item = np.random.choice(self.imnames) sample = self.raw_dataset['data'][random_item] image = sample[()] charBB = sample.attrs['charBB'] txt = [each.decode('utf-8') for each in sample.attrs['txt']] # print(txt) # Handle line-break all_words = [] for line in txt: if '\n' in line: all_words.extend(line.split('\n')) else: all_words.append(line) # Remove blank word for index, line in enumerate(all_words): all_words[index] = [word for word in line.strip().split(' ') if word not in ['', ' ']] # Split word to char for index, line in enumerate(all_words): new_line = [] for word in line: if len(word) >= 2: new_line.extend([char for char in word]) else: new_line.append(word) all_words[index] = new_line # print('--------') # print(all_words) # print('--------') # Resize the image to (768, 768) image, character = resize(image, charBB.copy()) image = normalize_mean_variance(image).transpose(2, 0, 1) # Generate character heatmap with weights weight_character, weak_supervision_char = generate_target( image.shape, character.copy(), weight=1) # Generate affinity heatmap with weights weight_affinity, weak_supervision_affinity = generate_affinity( image.shape, character.copy(), all_words.copy(), weight=1) dataset_name = 'SYNTH' text_target = '' else: random_item = np.random.randint(len(self.gt)) image = plt.imread(os.path.join( self.base_path_other_images, self.gt[random_item][0])) # Read the image if len(image.shape) == 2: image = np.repeat(image[:, :, None], repeats=3, axis=2) elif image.shape[2] == 1: image = np.repeat(image, repeats=3, axis=2) else: image = image[:, :, 0: 3] height, width, channel = image.shape character = [ np.array(word_i).reshape([len(word_i), 4, 1, 2]) for word_i in self.gt[random_item][1]['characters'].copy()] affinity = [ np.array(word_i).reshape([len(word_i), 4, 1, 2]) for word_i in self.gt[random_item][1]['affinity'].copy()] assert len(character) == len( affinity), 'word length different in character and affinity' # Resize the image to (768, 768) image, character, affinity = resize_generated( image, character.copy(), affinity.copy()) image = normalize_mean_variance(image).transpose(2, 0, 1) weights = [i for i in self.gt[random_item][1]['weights'].copy()] text_target = '#@#@#@'.join(self.gt[random_item][1]['text']) assert len(self.gt[random_item][1]['text']) == len(self.gt[random_item][1]['word_bbox']), \ 'Length of word_bbox != Length of text' # assert len(text_target.split('#@#@#@')) == len(self.gt[random_item][1]['word_bbox']), \ # 'Some error in splitting' # Generate character heatmap with weights weight_character, weak_supervision_char = generate_target_others( image.shape, character.copy(), np.array(weights)[:, 0]) # Generate affinity heatmap with weights weight_affinity, weak_supervision_affinity = generate_target_others( image.shape, affinity.copy(), np.array(weights)[:, 0]) # Get original word_bbox annotations dataset_name = 'datapile' return \ image.astype(np.float32), \ weight_character.astype(np.float32), \ weight_affinity.astype(np.float32), \ weak_supervision_char.astype(np.float32), \ weak_supervision_affinity.astype(np.float32), \ dataset_name, \ text_target, \ str(random_item), \ np.array([height, width])