def _generate_data(self, split: str) -> None:
        print(f"EMNISTLines2 generating data for {split}...")

        # pylint: disable=import-outside-toplevel
        from text_recognizer.data.sentence_generator import SentenceGenerator

        sentence_generator = SentenceGenerator(self.max_length - 2)  # Subtract two because we will add start/end tokens

        emnist = self.emnist
        emnist.prepare_data()
        emnist.setup()

        if split == "train":
            samples_by_char = get_samples_by_char(emnist.x_trainval, emnist.y_trainval, emnist.mapping)
            num = self.num_train
        elif split == "val":
            samples_by_char = get_samples_by_char(emnist.x_trainval, emnist.y_trainval, emnist.mapping)
            num = self.num_val
        else:
            samples_by_char = get_samples_by_char(emnist.x_test, emnist.y_test, emnist.mapping)
            num = self.num_test

        DATA_DIRNAME.mkdir(parents=True, exist_ok=True)
        with h5py.File(self.data_filename, "a") as f:
            x, y = create_dataset_of_images(
                num, samples_by_char, sentence_generator, self.min_overlap, self.max_overlap, self.dims
            )
            y = convert_strings_to_labels(
                y,
                emnist.inverse_mapping,
                length=MAX_OUTPUT_LENGTH,
            )
            f.create_dataset(f"x_{split}", data=x, dtype="u1", compression="lzf")
            f.create_dataset(f"y_{split}", data=y, dtype="u1", compression="lzf")
Exemplo n.º 2
0
    def setup(self, stage: str = None) -> None:
        print(f"IAMSyntheticParagraphs.setup({stage}): Loading trainval IAM paragraph regions and lines...")

        if stage == "fit" or stage is None:
            line_crops, line_labels = load_line_crops_and_labels("trainval", PROCESSED_DATA_DIRNAME)
            X, para_labels = generate_synthetic_paragraphs(line_crops=line_crops, line_labels=line_labels)
            Y = convert_strings_to_labels(strings=para_labels, mapping=self.inverse_mapping, length=self.output_dims[0])
            transform = get_transform(image_shape=self.dims[1:], augment=self.augment)  # type: ignore
            self.data_train = BaseDataset(X, Y, transform=transform)
Exemplo n.º 3
0
 def _load_dataset(split: str, augment: bool) -> BaseDataset:
     crops, labels = load_processed_crops_and_labels(split)
     X = [resize_image(crop, IMAGE_SCALE_FACTOR) for crop in crops]
     Y = convert_strings_to_labels(strings=labels,
                                   mapping=self.inverse_mapping,
                                   length=self.output_dims[0])
     transform = get_transform(image_shape=self.dims[1:],
                               augment=augment)  # type: ignore
     return BaseDataset(X, Y, transform=transform)
Exemplo n.º 4
0
    def setup(self, stage: str = None) -> None:
        with open(PROCESSED_DATA_DIRNAME / "_max_aspect_ratio.txt") as file:
            max_aspect_ratio = float(file.read())
            image_width = int(IMAGE_HEIGHT * max_aspect_ratio)
            assert image_width <= IMAGE_WIDTH

        if stage == "fit" or stage is None:
            x_trainval, labels_trainval = load_line_crops_and_labels(
                "trainval", PROCESSED_DATA_DIRNAME)
            assert self.output_dims[0] >= max(
                [len(_)
                 for _ in labels_trainval]) + 2  # Add 2 for start/end tokens.

            y_trainval = convert_strings_to_labels(labels_trainval,
                                                   self.inverse_mapping,
                                                   length=self.output_dims[0])
            data_trainval = BaseDataset(x_trainval,
                                        y_trainval,
                                        transform=get_transform(
                                            IMAGE_WIDTH, self.augment))

            self.data_train, self.data_val = split_dataset(
                base_dataset=data_trainval, fraction=TRAIN_FRAC, seed=42)

        # Note that test data does not go through augmentation transforms
        if stage == "test" or stage is None:
            x_test, labels_test = load_line_crops_and_labels(
                "test", PROCESSED_DATA_DIRNAME)
            assert self.output_dims[0] >= max([len(_)
                                               for _ in labels_test]) + 2

            y_test = convert_strings_to_labels(labels_test,
                                               self.inverse_mapping,
                                               length=self.output_dims[0])
            self.data_test = BaseDataset(x_test,
                                         y_test,
                                         transform=get_transform(IMAGE_WIDTH))

        if stage is None:
            self._verify_output_dims(labels_trainval=labels_trainval,
                                     labels_test=labels_test)
    def setup(self, stage: str = None):
        with open(PROCESSED_DATA_DIRNAME / '_max_aspect_ratio.txt') as file:
            max_aspect_ratio = float(file.read())
            image_width = int(IMAGE_HEIGHT * max_aspect_ratio)
            assert image_width <= IMAGE_WIDTH
        with open(PROCESSED_DATA_DIRNAME / 'trainval' / '_labels.json') as file:
            labels_trainval = json.load(file)
        with open(PROCESSED_DATA_DIRNAME / 'test' / '_labels.json') as file:
            labels_test = json.load(file)

        max_label_length = max([len(label) for label in labels_trainval + labels_test]) + 2  # Add 2 because of start and end tokens.
        output_dims = (max_label_length, 1)
        if output_dims != self.output_dims:
            raise RuntimeError(dims, output_dims)

        if stage == "fit" or stage is None:
            filenames_trainval = sorted(
                (PROCESSED_DATA_DIRNAME / 'trainval').glob('*.png'),
                key=lambda filename: int(Path(filename).stem)
            )
            x_trainval = [Image.open(filename) for filename in filenames_trainval]
            y_trainval = convert_strings_to_labels(labels_trainval, self.inverse_mapping, length=self.output_dims[0])
            data_trainval = BaseDataset(x_trainval, y_trainval, transform=get_transform(IMAGE_WIDTH, self.augment))

            train_size = int(TRAIN_FRAC * len(data_trainval))
            val_size = len(data_trainval) - train_size
            self.data_train, self.data_val = torch.utils.data.random_split(
                data_trainval, [train_size, val_size], generator=torch.Generator().manual_seed(42)
            )

        # Note that test data does not go through augmentation transforms
        if stage == "test" or stage is None:
            filenames_test = sorted(
                (PROCESSED_DATA_DIRNAME / 'test').glob('*.png'),
                key=lambda filename: int(Path(filename).stem)
            )
            x_test = [Image.open(filename) for filename in filenames_test]
            y_test = convert_strings_to_labels(labels_test, self.inverse_mapping, length=self.output_dims[0])
            self.data_test = BaseDataset(x_test, y_test, transform=get_transform(IMAGE_WIDTH))