示例#1
0
def generate_next_targets(original_dim, output, image, base_target_path,
                          image_name, annots, dataloader, no):
    if 'datapile' in config.dataset_name:
        image_name = image_name.split('/')[-1]
    # visualize = config.visualize_generated and no % config.visualize_freq == 0 and no != 0
    visualize = config.visualize_generated  # Just for debuging
    max_dim = original_dim.max()
    resizing_factor = 768 / max_dim
    before_pad_dim = [
        int(original_dim[0] * resizing_factor),
        int(original_dim[1] * resizing_factor)
    ]

    output = np.uint8(output * 255)

    height_pad = (768 - before_pad_dim[0]) // 2
    width_pad = (768 - before_pad_dim[1]) // 2

    character_bbox = cv2.resize(
        output[0, height_pad:height_pad + before_pad_dim[0],
               width_pad:width_pad + before_pad_dim[1]],
        (original_dim[1] // 2, original_dim[0] // 2)) / 255

    affinity_bbox = cv2.resize(
        output[1, height_pad:height_pad + before_pad_dim[0],
               width_pad:width_pad + before_pad_dim[1]],
        (original_dim[1] // 2, original_dim[0] // 2)) / 255

    # Generating word-bbox given character and affinity heatmap

    generated_targets = generate_word_bbox(
        character_bbox,
        affinity_bbox,
        character_threshold=config.threshold_character,
        affinity_threshold=config.threshold_affinity,
        word_threshold=config.threshold_word,
        character_threshold_upper=config.threshold_character_upper,
        affinity_threshold_upper=config.threshold_affinity_upper,
        scaling_character=config.scale_character,
        scaling_affinity=config.scale_affinity)

    generated_targets['word_bbox'] = generated_targets['word_bbox'] * 2
    generated_targets['characters'] = [
        i * 2 for i in generated_targets['characters']
    ]
    generated_targets['affinity'] = [
        i * 2 for i in generated_targets['affinity']
    ]

    if visualize:

        character_bbox = cv2.resize((character_bbox * 255).astype(np.uint8),
                                    (original_dim[1], original_dim[0])) / 255

        affinity_bbox = cv2.resize((affinity_bbox * 255).astype(np.uint8),
                                   (original_dim[1], original_dim[0])) / 255

        image_i = denormalize_mean_variance(image.data.cpu().numpy().transpose(
            1, 2, 0))

        image_i = cv2.resize(
            image_i[height_pad:height_pad + before_pad_dim[0],
                    width_pad:width_pad + before_pad_dim[1]],
            (original_dim[1], original_dim[0]))

        # Saving affinity heat map
        plt.imsave(base_target_path + '_predicted/affinity/' +
                   '.'.join(image_name.split('.')[:-1]) + '.png',
                   np.float32(affinity_bbox > config.threshold_affinity_upper),
                   cmap='gray')

        # Saving character heat map
        plt.imsave(
            base_target_path + '_predicted/character/' +
            '.'.join(image_name.split('.')[:-1]) + '.png',
            np.float32(character_bbox > config.threshold_character_upper),
            cmap='gray')

        cv2.drawContours(image_i, generated_targets['word_bbox'], -1,
                         (0, 255, 0), 2)

        # Saving word bbox drawn on the original image
        plt.imsave(
            base_target_path + '_predicted/word_bbox/' +
            '.'.join(image_name.split('.')[:-1]) + '.png', image_i)

    predicted_word_bbox = generated_targets['word_bbox'].copy()
    # --------------- PostProcessing for creating the targets for the next iteration ---------------- #
    generated_targets = get_weighted_character_target(
        generated_targets, {
            'bbox': annots['bbox'],
            'text': annots['text']
        }, dataloader.dataset.unknown, config.threshold_fscore,
        config.weight_threshold)
    target_word_bbox = generated_targets['word_bbox'].copy()

    f_score = calculate_fscore(
        predicted_word_bbox[:, :, 0, :],
        target_word_bbox[:, :, 0, :],
        text_target=annots['text'],
        unknown=dataloader.dataset.gt['unknown'])['f_score']

    if visualize:
        image_i = denormalize_mean_variance(image.data.cpu().numpy().transpose(
            1, 2, 0))
        image_i = cv2.resize(
            image_i[height_pad:height_pad + before_pad_dim[0],
                    width_pad:width_pad + before_pad_dim[1]],
            (original_dim[1], original_dim[0]))

        # Generated word_bbox after postprocessing
        cv2.drawContours(image_i, generated_targets['word_bbox'], -1,
                         (0, 255, 0), 2)

        # Saving word bbox after postprocessing
        plt.imsave(
            base_target_path + '_next_target/word_bbox/' +
            '.'.join(image_name.split('.')[:-1]) + '.png', image_i)

        # Generate affinity heatmap after postprocessing
        affinity_target, affinity_weight_map = generate_target_others(
            (image_i.shape[0], image_i.shape[1]),
            generated_targets['affinity'].copy(),
            np.array(generated_targets['weights'])[:, 1])

        # Generate character heatmap after postprocessing
        character_target, characters_weight_map = generate_target_others(
            (image_i.shape[0], image_i.shape[1]),
            generated_targets['characters'].copy(),
            np.array(generated_targets['weights'])[:, 0])

        # Saving the affinity heatmap
        plt.imsave(base_target_path + '_next_target/affinity/' +
                   '.'.join(image_name.split('.')[:-1]) + '.png',
                   affinity_target,
                   cmap='gray')

        # Saving the character heatmap
        plt.imsave(base_target_path + '_next_target/character/' +
                   '.'.join(image_name.split('.')[:-1]) + '.png',
                   character_target,
                   cmap='gray')

        # Saving the affinity weight map
        plt.imsave(base_target_path + '_next_target/affinity_weight/' +
                   '.'.join(image_name.split('.')[:-1]) + '.png',
                   affinity_weight_map,
                   cmap='gray')

        # Saving the character weight map
        plt.imsave(base_target_path + '_next_target/character_weight/' +
                   '.'.join(image_name.split('.')[:-1]) + '.png',
                   characters_weight_map,
                   cmap='gray')

    # Saving the target for next iteration in json format

    generated_targets['word_bbox'] = generated_targets['word_bbox'].tolist()
    generated_targets['characters'] = [
        word_i.tolist() for word_i in generated_targets['characters']
    ]
    generated_targets['affinity'] = [
        word_i.tolist() for word_i in generated_targets['affinity']
    ]

    with open(base_target_path + '/' + image_name + '.json', 'w') as f:
        json.dump(generated_targets, f)

    return f_score
示例#2
0
    def __getitem__(self, item_i):

        # noinspection PyArgumentList
        np.random.seed()
        check = np.random.uniform()

        if check < config.prob_synth and self.type_ == 'train':
            # probability of picking a Synth-Text image vs Image from dataset

            random_item = np.random.randint(len(self.imnames))

            character = self.charBB[random_item].copy()

            image = plt.imread(self.base_path_synth + '/' +
                               self.imnames[random_item][0])  # Read the image

            if len(image.shape) == 2:
                image = np.repeat(image[:, :, None], repeats=3, axis=2)
            elif image.shape[2] == 1:
                image = np.repeat(image, repeats=3, axis=2)
            else:
                image = image[:, :, 0:3]

            height, width, channel = image.shape
            image, character = resize(
                image, character)  # Resize the image to (768, 768)
            image = normalize_mean_variance(image).transpose(2, 0, 1)

            # Generate character heatmap with weights
            weight_character, weak_supervision_char = generate_target(
                image.shape, character.copy(), weight=1)

            # Generate affinity heatmap with weights
            weight_affinity, weak_supervision_affinity = generate_affinity(
                image.shape,
                character.copy(),
                self.txt[random_item].copy(),
                weight=1)

            dataset_name = 'SYNTH'
            text_target = ''

        else:

            random_item = np.random.randint(len(self.gt))
            image = plt.imread(self.base_path_other_images + '/' +
                               self.gt[random_item][0])  # Read the image

            if len(image.shape) == 2:
                image = np.repeat(image[:, :, None], repeats=3, axis=2)
            elif image.shape[2] == 1:
                image = np.repeat(image, repeats=3, axis=2)
            else:
                image = image[:, :, 0:3]

            height, width, channel = image.shape
            character = [
                np.array(word_i).reshape([len(word_i), 4, 1, 2])
                for word_i in self.gt[random_item][1]['characters'].copy()
            ]
            affinity = [
                np.array(word_i).reshape([len(word_i), 4, 1, 2])
                for word_i in self.gt[random_item][1]['affinity'].copy()
            ]

            assert len(character) == len(
                affinity), 'word length different in character and affinity'

            # Resize the image to (768, 768)
            image, character, affinity = resize_generated(
                image, character.copy(), affinity.copy())
            image = normalize_mean_variance(image).transpose(2, 0, 1)
            weights = [i for i in self.gt[random_item][1]['weights'].copy()]
            text_target = '#@#@#@'.join(self.gt[random_item][1]['text'])

            assert len(self.gt[random_item][1]['text']) == len(self.gt[random_item][1]['word_bbox']), \
             'Length of word_bbox != Length of text'

            # assert len(text_target.split('#@#@#@')) == len(self.gt[random_item][1]['word_bbox']), \
            # 	'Some error in splitting'

            # Generate character heatmap with weights
            weight_character, weak_supervision_char = generate_target_others(
                image.shape, character.copy(), weights.copy())

            # Generate affinity heatmap with weights
            weight_affinity, weak_supervision_affinity = generate_target_others(
                image.shape, affinity.copy(), weights.copy())

            # Get original word_bbox annotations
            dataset_name = 'ICDAR'

        return \
         image.astype(np.float32), \
         weight_character.astype(np.float32), \
         weight_affinity.astype(np.float32), \
         weak_supervision_char.astype(np.float32), \
         weak_supervision_affinity.astype(np.float32), \
         dataset_name, \
         text_target, \
         random_item, \
         np.array([height, width])
示例#3
0
    def __getitem__(self, item_i):
        height, width, channel = 0, 0, 0
        # noinspection PyArgumentList
        np.random.seed()
        check = np.random.uniform()

        if check < config.prob_synth and self.type_ == 'train':
            # probability of picking a Synth-Text image vs Image from dataset

            random_item = np.random.choice(self.imnames)
            sample = self.raw_dataset['data'][random_item]
            image = sample[()]
            charBB = sample.attrs['charBB']
            txt = [each.decode('utf-8') for each in sample.attrs['txt']]
            # print(txt)
            # Handle line-break
            all_words = []
            for line in txt:
                if '\n' in line:
                    all_words.extend(line.split('\n'))
                else:
                    all_words.append(line)
            # Remove blank word
            for index, line in enumerate(all_words):
                all_words[index] = [word for word in line.strip().split(' ')
                                    if word not in ['', ' ']]
            # Split word to char
            for index, line in enumerate(all_words):
                new_line = []
                for word in line:
                    if len(word) >= 2:
                        new_line.extend([char for char in word])
                    else:
                        new_line.append(word)
                all_words[index] = new_line
            # print('--------')
            # print(all_words)
            # print('--------')

            # Resize the image to (768, 768)
            image, character = resize(image, charBB.copy())
            image = normalize_mean_variance(image).transpose(2, 0, 1)
            # Generate character heatmap with weights
            weight_character, weak_supervision_char = generate_target(
                image.shape, character.copy(), weight=1)

            # Generate affinity heatmap with weights
            weight_affinity, weak_supervision_affinity = generate_affinity(
                image.shape, character.copy(),
                all_words.copy(),
                weight=1)

            dataset_name = 'SYNTH'
            text_target = ''

        else:

            random_item = np.random.randint(len(self.gt))
            image = plt.imread(os.path.join(
                self.base_path_other_images, self.gt[random_item][0]))  # Read the image

            if len(image.shape) == 2:
                image = np.repeat(image[:, :, None], repeats=3, axis=2)
            elif image.shape[2] == 1:
                image = np.repeat(image, repeats=3, axis=2)
            else:
                image = image[:, :, 0: 3]

            height, width, channel = image.shape
            character = [
                np.array(word_i).reshape([len(word_i), 4, 1, 2]) for word_i in self.gt[random_item][1]['characters'].copy()]
            affinity = [
                np.array(word_i).reshape([len(word_i), 4, 1, 2]) for word_i in self.gt[random_item][1]['affinity'].copy()]

            assert len(character) == len(
                affinity), 'word length different in character and affinity'

            # Resize the image to (768, 768)
            image, character, affinity = resize_generated(
                image, character.copy(), affinity.copy())
            image = normalize_mean_variance(image).transpose(2, 0, 1)
            weights = [i for i in self.gt[random_item][1]['weights'].copy()]
            text_target = '#@#@#@'.join(self.gt[random_item][1]['text'])

            assert len(self.gt[random_item][1]['text']) == len(self.gt[random_item][1]['word_bbox']), \
                'Length of word_bbox != Length of text'

            # assert len(text_target.split('#@#@#@')) == len(self.gt[random_item][1]['word_bbox']), \
            # 	'Some error in splitting'
            
            # Generate character heatmap with weights
            weight_character, weak_supervision_char = generate_target_others(
                image.shape, character.copy(), np.array(weights)[:, 0])

            # Generate affinity heatmap with weights
            weight_affinity, weak_supervision_affinity = generate_target_others(
            image.shape, affinity.copy(), np.array(weights)[:, 0])

            # Get original word_bbox annotations
            dataset_name = 'datapile'


        return \
            image.astype(np.float32), \
            weight_character.astype(np.float32), \
            weight_affinity.astype(np.float32), \
            weak_supervision_char.astype(np.float32), \
            weak_supervision_affinity.astype(np.float32), \
            dataset_name, \
            text_target, \
            str(random_item), \
            np.array([height, width])