def test(model): """ Test the weak-supervised model :param model: Pre-trained model on SynthText :return: F-score, loss """ dataloader = DataLoader(DataLoaderEvalICDAR2013('test'), batch_size=config.batch_size['train'], num_workers=8, shuffle=False) with torch.no_grad(): model.eval() iterator = tqdm(dataloader) all_accuracy = [] ground_truth = dataloader.dataset.gt for no, (image, image_name, original_dim, item) in enumerate(iterator): annots = [] for i in item: annot = ground_truth['annots'][dataloader.dataset.imnames[i]] annots.append(annot) if config.use_cuda: image = image.cuda() output = model(image) if type(output) == list: output = torch.cat(output, dim=0) output = output.data.cpu().numpy() original_dim = original_dim.cpu().numpy() f_score = [] for i in range(output.shape[0]): # --------- Resizing it back to the original image size and saving it ----------- # f_score.append( calculate_fscore( resize_bbox(original_dim[i], output[i], config)['word_bbox'][:, :, 0, :], np.array(annots[i]['bbox']), text_target=annots[i]['text'], )) # --------------- PostProcessing for creating the targets for the next iteration ---------------- # all_accuracy.append(np.mean(f_score)) iterator.set_description('F-score: ' + str(np.mean(all_accuracy))) torch.cuda.empty_cache() return np.mean(all_accuracy)
def test(model, iteration): """ Test the weak-supervised model :param model: Pre-trained model on SynthText :param iteration: Iteration Number :return: F-score, loss """ os.makedirs(config.save_path + '/Test_'+str(iteration), exist_ok=True) dataloader = DataLoader( DataLoaderEvalOther('test'), batch_size=config.batch_size['test'], num_workers=config.num_workers['test'], shuffle=False, worker_init_fn=_init_fn ) true_positive = 0 false_positive = 0 num_positive = 0 with torch.no_grad(): model.eval() iterator = tqdm(dataloader) all_accuracy = [] ground_truth = dataloader.dataset.gt for no, (image, image_name, original_dim, item) in enumerate(iterator): annots = [] for i in item: annot = ground_truth['annots'][dataloader.dataset.imnames[i]] annots.append(annot) if config.use_cuda: image = image.cuda() output = model(image) if type(output) == list: output = torch.cat(output, dim=0) output = output.data.cpu().numpy() output[output > 1] = 1 output[output < 0] = 0 original_dim = original_dim.cpu().numpy() f_score = [] for i in range(output.shape[0]): # --------- Resizing it back to the original image size and saving it ----------- # cur_image = denormalize_mean_variance(image[i].data.cpu().numpy().transpose(1, 2, 0)) max_dim = original_dim[i].max() resizing_factor = 768 / max_dim before_pad_dim = [int(original_dim[i][0] * resizing_factor), int(original_dim[i][1] * resizing_factor)] height_pad = (768 - before_pad_dim[0]) // 2 width_pad = (768 - before_pad_dim[1]) // 2 cur_image = cv2.resize( cur_image[height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]], (original_dim[i][1], original_dim[i][0])) cv2.drawContours(cur_image, resize_bbox(original_dim[i], output[i], config)['word_bbox'], -1, (0, 255, 0), 2) cv2.drawContours(cur_image, np.array(annots[i]['bbox']), -1, (0, 0, 255), 2) plt.imsave( config.save_path + '/Test_' + str(iteration) + '/' + image_name[i], cur_image.astype(np.uint8)) score_calc = calculate_fscore( resize_bbox(original_dim[i], output[i], config)['word_bbox'][:, :, 0, :], np.array(annots[i]['bbox']), text_target=annots[i]['text'], ) f_score.append( score_calc['f_score'] ) true_positive += score_calc['true_positive'] false_positive += score_calc['false_positive'] num_positive += score_calc['num_positive'] # --------------- PostProcessing for creating the targets for the next iteration ---------------- # all_accuracy.append(np.mean(f_score)) precision = true_positive / (true_positive + false_positive) recall = true_positive / num_positive iterator.set_description( 'F-score: ' + str(np.mean(all_accuracy)) + '| Cumulative F-score: ' + str(2*precision*recall/(precision + recall))) torch.cuda.empty_cache() return 2*precision*recall/(precision + recall), precision, recall
def generate_next_targets(original_dim, output, image, base_target_path, image_name, annots, dataloader, no): if 'datapile' in config.dataset_name: image_name = image_name.split('/')[-1] # visualize = config.visualize_generated and no % config.visualize_freq == 0 and no != 0 visualize = config.visualize_generated # Just for debuging max_dim = original_dim.max() resizing_factor = 768 / max_dim before_pad_dim = [ int(original_dim[0] * resizing_factor), int(original_dim[1] * resizing_factor) ] output = np.uint8(output * 255) height_pad = (768 - before_pad_dim[0]) // 2 width_pad = (768 - before_pad_dim[1]) // 2 character_bbox = cv2.resize( output[0, height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]], (original_dim[1] // 2, original_dim[0] // 2)) / 255 affinity_bbox = cv2.resize( output[1, height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]], (original_dim[1] // 2, original_dim[0] // 2)) / 255 # Generating word-bbox given character and affinity heatmap generated_targets = generate_word_bbox( character_bbox, affinity_bbox, character_threshold=config.threshold_character, affinity_threshold=config.threshold_affinity, word_threshold=config.threshold_word, character_threshold_upper=config.threshold_character_upper, affinity_threshold_upper=config.threshold_affinity_upper, scaling_character=config.scale_character, scaling_affinity=config.scale_affinity) generated_targets['word_bbox'] = generated_targets['word_bbox'] * 2 generated_targets['characters'] = [ i * 2 for i in generated_targets['characters'] ] generated_targets['affinity'] = [ i * 2 for i in generated_targets['affinity'] ] if visualize: character_bbox = cv2.resize((character_bbox * 255).astype(np.uint8), (original_dim[1], original_dim[0])) / 255 affinity_bbox = cv2.resize((affinity_bbox * 255).astype(np.uint8), (original_dim[1], original_dim[0])) / 255 image_i = denormalize_mean_variance(image.data.cpu().numpy().transpose( 1, 2, 0)) image_i = cv2.resize( image_i[height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]], (original_dim[1], original_dim[0])) # Saving affinity heat map plt.imsave(base_target_path + '_predicted/affinity/' + '.'.join(image_name.split('.')[:-1]) + '.png', np.float32(affinity_bbox > config.threshold_affinity_upper), cmap='gray') # Saving character heat map plt.imsave( base_target_path + '_predicted/character/' + '.'.join(image_name.split('.')[:-1]) + '.png', np.float32(character_bbox > config.threshold_character_upper), cmap='gray') cv2.drawContours(image_i, generated_targets['word_bbox'], -1, (0, 255, 0), 2) # Saving word bbox drawn on the original image plt.imsave( base_target_path + '_predicted/word_bbox/' + '.'.join(image_name.split('.')[:-1]) + '.png', image_i) predicted_word_bbox = generated_targets['word_bbox'].copy() # --------------- PostProcessing for creating the targets for the next iteration ---------------- # generated_targets = get_weighted_character_target( generated_targets, { 'bbox': annots['bbox'], 'text': annots['text'] }, dataloader.dataset.unknown, config.threshold_fscore, config.weight_threshold) target_word_bbox = generated_targets['word_bbox'].copy() f_score = calculate_fscore( predicted_word_bbox[:, :, 0, :], target_word_bbox[:, :, 0, :], text_target=annots['text'], unknown=dataloader.dataset.gt['unknown'])['f_score'] if visualize: image_i = denormalize_mean_variance(image.data.cpu().numpy().transpose( 1, 2, 0)) image_i = cv2.resize( image_i[height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]], (original_dim[1], original_dim[0])) # Generated word_bbox after postprocessing cv2.drawContours(image_i, generated_targets['word_bbox'], -1, (0, 255, 0), 2) # Saving word bbox after postprocessing plt.imsave( base_target_path + '_next_target/word_bbox/' + '.'.join(image_name.split('.')[:-1]) + '.png', image_i) # Generate affinity heatmap after postprocessing affinity_target, affinity_weight_map = generate_target_others( (image_i.shape[0], image_i.shape[1]), generated_targets['affinity'].copy(), np.array(generated_targets['weights'])[:, 1]) # Generate character heatmap after postprocessing character_target, characters_weight_map = generate_target_others( (image_i.shape[0], image_i.shape[1]), generated_targets['characters'].copy(), np.array(generated_targets['weights'])[:, 0]) # Saving the affinity heatmap plt.imsave(base_target_path + '_next_target/affinity/' + '.'.join(image_name.split('.')[:-1]) + '.png', affinity_target, cmap='gray') # Saving the character heatmap plt.imsave(base_target_path + '_next_target/character/' + '.'.join(image_name.split('.')[:-1]) + '.png', character_target, cmap='gray') # Saving the affinity weight map plt.imsave(base_target_path + '_next_target/affinity_weight/' + '.'.join(image_name.split('.')[:-1]) + '.png', affinity_weight_map, cmap='gray') # Saving the character weight map plt.imsave(base_target_path + '_next_target/character_weight/' + '.'.join(image_name.split('.')[:-1]) + '.png', characters_weight_map, cmap='gray') # Saving the target for next iteration in json format generated_targets['word_bbox'] = generated_targets['word_bbox'].tolist() generated_targets['characters'] = [ word_i.tolist() for word_i in generated_targets['characters'] ] generated_targets['affinity'] = [ word_i.tolist() for word_i in generated_targets['affinity'] ] with open(base_target_path + '/' + image_name + '.json', 'w') as f: json.dump(generated_targets, f) return f_score
def test(model): """ Test the weak-supervised model :param model: Pre-trained model on SynthText :return: F-score, loss """ dataloader = DataLoader( DataLoaderEvalICDAR2013('test'), batch_size=config.batch_size['train'], num_workers=8, shuffle=False) with torch.no_grad(): model.eval() iterator = tqdm(dataloader) all_accuracy = [] ground_truth = dataloader.dataset.gt for no, (image, image_name, original_dim, item) in enumerate(iterator): annots = [] for i in item: annot = ground_truth['annots'][dataloader.dataset.imnames[i]] annots.append(annot) if config.use_cuda: image = image.cuda() output = model(image) if type(output) == list: output = torch.cat(output, dim=0) output = output.data.cpu().numpy() original_dim = original_dim.cpu().numpy() f_score = [] for i in range(output.shape[0]): # --------- Resizing it back to the original image size and saving it ----------- # max_dim = original_dim[i].max() resizing_factor = 768 / max_dim before_pad_dim = [int(original_dim[i][0] * resizing_factor), int(original_dim[i][1] * resizing_factor)] output[i, :, :, :] = np.uint8(output[i, :, :, :] * 255) height_pad = (768 - before_pad_dim[0]) // 2 width_pad = (768 - before_pad_dim[1]) // 2 character_bbox = cv2.resize( output[i, 0, height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]], (original_dim[i][1], original_dim[i][0])) / 255 affinity_bbox = cv2.resize( output[i, 1, height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]], (original_dim[i][1], original_dim[i][0])) / 255 generated_targets = generate_word_bbox( character_bbox, affinity_bbox, character_threshold=config.threshold_character, affinity_threshold=config.threshold_affinity, word_threshold=config.threshold_word) predicted_word_bbox = generated_targets['word_bbox'].copy() f_score.append(calculate_fscore(predicted_word_bbox[:, :, 0, :], np.array(annots[i]['bbox']))) # --------------- PostProcessing for creating the targets for the next iteration ---------------- # all_accuracy.append(np.mean(f_score)) iterator.set_description('F-score: ' + str(np.mean(all_accuracy))) torch.cuda.empty_cache() return np.mean(all_accuracy)
def synthesize_with_score(dataloader, model, base_target_path, iteration): """ Given a path to a set of images(icdar 2013 dataset), and path to a pre-trained model, generate the character heatmap and affinity heatmap and a json of all the annotations :param dataloader: dataloader for icdar 2013 dataset :param model: pre-trained model :param base_target_path: path where to store the predictions :return: """ with torch.no_grad(): model.eval() iterator = tqdm(dataloader) mean_f_score = [] for no, (image, image_name, original_dim, item) in enumerate(iterator): annots = [] for i in item: annot = dataloader.dataset.gt['annots'][ dataloader.dataset.imnames[i]] annots.append(annot) if config.use_cuda: image = image.cuda() output = model(image) if type(output) == list: output = torch.cat(output, dim=0) output = output.data.cpu().numpy() original_dim = original_dim.cpu().numpy() f_score = [] for i in range(output.shape[0]): # --------- Resizing it back to the original image size and saving it ----------- # max_dim = original_dim[i].max() resizing_factor = 768 / max_dim before_pad_dim = [ int(original_dim[i][0] * resizing_factor), int(original_dim[i][1] * resizing_factor) ] output[i, :, :, :] = np.uint8(output[i, :, :, :] * 255) height_pad = (768 - before_pad_dim[0]) // 2 width_pad = (768 - before_pad_dim[1]) // 2 character_bbox = cv2.resize( output[i, 0, height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]], (original_dim[i][1], original_dim[i][0])) / 255 affinity_bbox = cv2.resize( output[i, 1, height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]], (original_dim[i][1], original_dim[i][0])) / 255 if config.visualize_generated: image_i = denormalize_mean_variance( image[i].data.cpu().numpy().transpose(1, 2, 0)) image_i = cv2.resize( image_i[height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]], (original_dim[i][1], original_dim[i][0])) # Generating word-bbox given character and affinity heatmap generated_targets = generate_word_bbox( character_bbox, affinity_bbox, character_threshold=config.threshold_character, affinity_threshold=config.threshold_affinity, word_threshold=config.threshold_word) if config.visualize_generated: # Saving affinity heat map plt.imsave( base_target_path + '_predicted/affinity/' + '.'.join(image_name[i].split('.')[:-1]) + '.png', np.float32(affinity_bbox > config.threshold_affinity), cmap='gray') # Saving character heat map plt.imsave( base_target_path + '_predicted/character/' + '.'.join(image_name[i].split('.')[:-1]) + '.png', np.float32( character_bbox > config.threshold_character), cmap='gray') cv2.drawContours(image_i, generated_targets['word_bbox'], -1, (0, 255, 0), 2) # Saving word bbox drawn on the original image plt.imsave( base_target_path + '_predicted/word_bbox/' + '.'.join(image_name[i].split('.')[:-1]) + '.png', image_i) predicted_word_bbox = generated_targets['word_bbox'].copy() # --------------- PostProcessing for creating the targets for the next iteration ---------------- # generated_targets = get_weighted_character_target( generated_targets, { 'bbox': annots[i]['bbox'], 'text': annots[i]['text'] }, dataloader.dataset.unknown, config.threshold_fscore, config.weight_threshold[iteration]) target_word_bbox = generated_targets['word_bbox'].copy() f_score.append( calculate_fscore(predicted_word_bbox[:, :, 0, :], target_word_bbox[:, :, 0, :])) if config.visualize_generated: image_i = denormalize_mean_variance( image[i].data.cpu().numpy().transpose(1, 2, 0)) image_i = cv2.resize( image_i[height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]], (original_dim[i][1], original_dim[i][0])) # Generated word_bbox after postprocessing cv2.drawContours(image_i, generated_targets['word_bbox'], -1, (0, 255, 0), 2) # Saving word bbox after postprocessing plt.imsave( base_target_path + '_next_target/word_bbox/' + '.'.join(image_name[i].split('.')[:-1]) + '.png', image_i) # Generate affinity heatmap after postprocessing affinity_target, affinity_weight_map = generate_target_others( (image_i.shape[0], image_i.shape[1]), generated_targets['affinity'].copy(), generated_targets['weights'].copy()) # Generate character heatmap after postprocessing character_target, characters_weight_map = generate_target_others( (image_i.shape[0], image_i.shape[1]), generated_targets['characters'].copy(), generated_targets['weights'].copy()) # Saving the affinity heatmap plt.imsave(base_target_path + '_next_target/affinity/' + '.'.join(image_name[i].split('.')[:-1]) + '.png', affinity_target, cmap='gray') # Saving the character heatmap plt.imsave(base_target_path + '_next_target/character/' + '.'.join(image_name[i].split('.')[:-1]) + '.png', character_target, cmap='gray') # Saving the affinity weight map plt.imsave( base_target_path + '_next_target/affinity_weight/' + '.'.join(image_name[i].split('.')[:-1]) + '.png', affinity_weight_map, cmap='gray') # Saving the character weight map plt.imsave( base_target_path + '_next_target/character_weight/' + '.'.join(image_name[i].split('.')[:-1]) + '.png', characters_weight_map, cmap='gray') # Saving the target for next iteration in json format generated_targets['word_bbox'] = generated_targets[ 'word_bbox'].tolist() generated_targets['characters'] = [ word_i.tolist() for word_i in generated_targets['characters'] ] generated_targets['affinity'] = [ word_i.tolist() for word_i in generated_targets['affinity'] ] with open( base_target_path + '/' + '.'.join(image_name[i].split('.')[:-1]) + '.json', 'w') as f: json.dump(generated_targets, f) mean_f_score.append(np.mean(f_score)) iterator.set_description('F-score: ' + str(np.mean(mean_f_score)))
def test(model): """ Test the weak-supervised model :param model: Pre-trained model on SynthText :return: F-score, loss """ dataloader = DataLoader( DataLoaderEvalOther('test'), batch_size=config.batch_size['test'], num_workers=config.num_workers['test'], shuffle=False ) true_positive = 0 false_positive = 0 num_positive = 0 with torch.no_grad(): model.eval() iterator = tqdm(dataloader) all_accuracy = [] ground_truth = dataloader.dataset.gt for no, (image, image_name, original_dim, item) in enumerate(iterator): annots = [] for i in item: annot = ground_truth['annots'][dataloader.dataset.imnames[i]] annots.append(annot) if config.use_cuda: image = image.cuda() output = model(image) if type(output) == list: output = torch.cat(output, dim=0) output = output.data.cpu().numpy() original_dim = original_dim.cpu().numpy() f_score = [] for i in range(output.shape[0]): # ToDo - Visualise the test results # ToDo - Why is F-score of testing always less than F-score of training at iteration 0? # --------- Resizing it back to the original image size and saving it ----------- # # cur_image = denormalize_mean_variance(image[i].data.cpu().numpy().transpose(1, 2, 0)) # # max_dim = original_dim[i].max() # resizing_factor = 768 / max_dim # before_pad_dim = [int(original_dim[i][0] * resizing_factor), int(original_dim[i][1] * resizing_factor)] # # height_pad = (768 - before_pad_dim[0]) // 2 # width_pad = (768 - before_pad_dim[1]) // 2 # # cur_image_backup = cv2.resize( # cur_image[height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]], # (original_dim[i][1], original_dim[i][0])) # # cur_image = cur_image_backup.copy() # # cv2.drawContours(cur_image, resize_bbox(original_dim[i], output[i], config)['word_bbox'], -1, (0, 255, 0), 2) # plt.imsave(str(i)+'_predicted.png', cur_image.astype(np.uint8)) # # cur_image = cur_image_backup.copy() # cv2.drawContours(cur_image, np.array(annots[i]['bbox']), -1, (0, 255, 0), 2) # plt.imsave(str(i) + '_target.png', cur_image.astype(np.uint8)) score_calc = calculate_fscore( resize_bbox(original_dim[i], output[i], config)['word_bbox'][:, :, 0, :], np.array(annots[i]['bbox']), text_target=annots[i]['text'], ) f_score.append( score_calc['f_score'] ) true_positive += score_calc['true_positive'] false_positive += score_calc['false_positive'] num_positive += score_calc['num_positive'] # --------------- PostProcessing for creating the targets for the next iteration ---------------- # # exit(0) all_accuracy.append(np.mean(f_score)) precision = true_positive / (true_positive + false_positive) recall = true_positive / num_positive iterator.set_description( 'F-score: ' + str(np.mean(all_accuracy)) + '| Cumulative F-score: ' + str(2*precision*recall/(precision + recall))) torch.cuda.empty_cache() return np.mean(all_accuracy)