Пример #1
0
 def dump(self, path):
     """Dumps class instance as serialized pickle file
     Args:
         path (str): dumping path
     """
     buffer = copy.deepcopy(self)
     io.save_dill(path, buffer)
Пример #2
0
 def __init__(self, root, transform=None, target_transform=None):
     self.idx2key = io.load_json(os.path.join(root, "index.json"))
     self.types = dict()
     self.root_dir = root
     for scrapset in ["artworks", "pokebip"]:
         self.types.update(
             **io.load_json(os.path.join(root, scrapset, "types.json")))
     self.transform = transform
     self.target_transform = target_transform
    def __init__(self):
        """
        INIT
        """
        self.class_matrix = np.zeros(9, dtype=np.int32)

        self.io_handler = IOHandler()

        self.total = 0
Пример #4
0
def human_baseline(gold_dataset_dir: str, annotation_path: str):
    """
    Shows the images from the dataset and ask the human to label them.
    :param str gold_dataset_dir: The directory of the gold dataset
    :param str annotation_path: The path to the annotation file, if it exists we will resume the labeling session
    """
    files = glob.glob(os.path.join(gold_dataset_dir, "*.jpeg"))
    io_handler = IOHandler()
    input_dictionary = restore_dictionary(annotation_path=annotation_path)

    try:
        pbar_desc = (
            "-1"
            if input_dictionary["total"] == 0
            else f"Current human accuracy: {round((input_dictionary['correct']/input_dictionary['total'])*100,2)}%"
        )
        with tqdm(total=len(files) - input_dictionary["total"], desc=pbar_desc) as pbar:
            for image_name in files:
                metadata = os.path.basename(image_name)[:-5]
                header, values = metadata.split("%")
                image_no = int(header[1:])
                if image_no not in input_dictionary["human_predictions"]:
                    gold_key = io_handler.imagename_input_conversion(
                        image_name=image_name, output_type="keyboard"
                    )

                    # image = io.imread(image_name)
                    os.system(f"xv {image_name} &")
                    # cv2.imshow("window1", img_as_ubyte(image))
                    # cv2.waitKey(1)
                    user_key = keys_to_id(input("Push the keys: "))

                    input_dictionary["human_predictions"][image_no] = user_key
                    input_dictionary["total"] += 1
                    if user_key == gold_key:
                        input_dictionary["correct"] += 1

                    pbar.update(1)

                    pbar.set_description(
                        f"Current human accuracy: {round((input_dictionary['correct']/input_dictionary['total'])*100,2)}%"
                    )

                    if input_dictionary["total"] % 20 == 0:
                        with open(
                            annotation_path, "w+", encoding="utf8"
                        ) as annotation_file:
                            json.dump(input_dictionary, annotation_file)

    except KeyboardInterrupt:
        with open(annotation_path, "w+", encoding="utf8") as annotation_file:
            json.dump(input_dictionary, annotation_file)

    with open(annotation_path, "w+", encoding="utf8") as annotation_file:
        json.dump(input_dictionary, annotation_file)
 def serialize(self, path=None):
     """Dumps object dictionnary as serialized pickle file
     Args:
         path (str): dumping path
     """
     if not path:
         path = os.path.join(self.session_dir, ConfigFile.pickle_filename)
     attributes = self.__dict__.copy()
     del attributes["optimizer"]
     attributes.update({
         "optimizer_class": self.optimizer.__class__,
         "optimizer_config": self.optimizer.get_config()
     })
     io.save_pickle(path, attributes)
Пример #6
0
 def test_sort_shakespeare(self):
     shakespeare = IOHandler(
         '../../Assets/Files/shakespeare-complete-works.txt')
     actual, expected = shakespeare.words, shakespeare.sorted_words
     self.assertTrue(len(actual) > 0)
     with self.assertRaises(RecursionError):
         actual = self.alg(actual)
    def setup_session(self, overwrite=False, timestamp=False):
        """Sets up training session directory

        Args:
            overwrite (bool): if True, overwrites existing directory (default: False)
            timestamp (bool): if True, adds timestamp to directory name (default: False)
        """
        session_name = self.session_name
        if timestamp:
            session_name = session_name + "_" + time.strftime("%Y%m%d-%H%M%S")
        io.mkdir(session_name, ConfigFile.bin_dir, overwrite)
        session_dir = os.path.join(ConfigFile.bin_dir, session_name)
        io.mkdir(ConfigFile.checkpoints_dirname, session_dir)
        io.mkdir(ConfigFile.tensorboard_dirname, session_dir)
        io.mkdir(ConfigFile.observations_dirname, session_dir)
        io.mkdir(ConfigFile.scores_dirname, session_dir)
        ConfigFile._write_gitignore(session_dir)
    def __init__(
        self,
        dataset_dir: str,
        hide_map_prob: float,
        dropout_images_prob: List[float],
        train: bool = False,
    ):
        """
        INIT

        :param str dataset_dir: The directory of the dataset.
        :param bool hide_map_prob: Probability of hiding the minimap (0<=hide_map_prob<=1)
        :param List[float] dropout_images_prob: Probability of dropping an image (0<=dropout_images_prob<=1)
        :param bool train: If True, the dataset is used for training.
        """

        self.dataset_dir = dataset_dir
        self.hide_map_prob = hide_map_prob
        self.dropout_images_prob = dropout_images_prob

        assert 0 <= hide_map_prob <= 1.0, (
            f"hide_map_prob not in 0 <= hide_map_prob <= 1.0 range. "
            f"hide_map_prob: {hide_map_prob}")

        assert len(dropout_images_prob) == 5, (
            f"dropout_images_prob must have 5 probabilities, one for each image in the sequence. "
            f"dropout_images_prob len: {len(dropout_images_prob)}")

        for dropout_image_prob in dropout_images_prob:
            assert 0 <= dropout_image_prob <= 1.0, (
                f"All probabilities in dropout_image_prob must be in the range 0 <= dropout_image_prob <= 1.0. "
                f"dropout_images_prob: {dropout_images_prob}")

        if train:
            self.transform = transforms.Compose([
                RemoveMinimap(hide_map_prob=hide_map_prob),
                RemoveImage(dropout_images_prob=dropout_images_prob),
                SplitImages(),
                ToTensor(),
                SequenceColorJitter(),
                Normalize(),
                MergeImages(),
                ReOrderImages(),
            ])
        else:
            self.transform = transforms.Compose([
                RemoveMinimap(hide_map_prob=hide_map_prob),
                # RemoveImage(dropout_images_prob=dropout_images_prob),
                SplitImages(),
                ToTensor(),
                # SequenceColorJitter(),
                Normalize(),
                MergeImages(),
                ReOrderImages(),
            ])
        self.dataset_files = glob.glob(os.path.join(dataset_dir, "*.jpeg"))

        self.IOHandler = IOHandler()
class BalancedDataset:
    """
    Generate a dataset of images with balanced classes.
    """

    class_matrix: np.ndarray
    io_handler: IOHandler
    total: int

    def __init__(self):
        """
        INIT
        """
        self.class_matrix = np.zeros(9, dtype=np.int32)

        self.io_handler = IOHandler()

        self.total = 0

    def balance_dataset(self, input_value: Union[np.ndarray, int]) -> bool:
        """
        Decide if a given input value is to be added to the dataset or not.
        The probability of returning True is proportional to the number of examples per class.
        The higher the number of examples of a given class, the lower the probability of returning True
        for examples of that class. Xbox controller inputs are mapped to keys.

        :param int input_value: The controller input value to decide if the example is to be added to the dataset or not.
        :return: True if the example is to be added to the dataset, False otherwise.
        """

        example_class = self.io_handler.input_conversion(
            input_value=input_value, output_type="keyboard")

        if self.total != 0:
            prop: float = ((self.total - self.class_matrix[example_class]) /
                           self.total)**2
            if prop <= 0.7:
                prop = 0.1

            if np.random.rand() <= prop:
                self.class_matrix[example_class] += 1
                self.total += 1
                return True
            else:
                return False
        else:
            self.class_matrix[example_class] += 1
            self.total += 1
            return True

    @property
    def get_matrix(self) -> np.ndarray:
        """
        Get the class matrix.

        :return: The class matrix.
        """
        return self.class_matrix
    def load(self, path):
        """Loads builder attributes

        Args:
            path (str): file path
        """
        kwargs = io.load_pickle(path)
        del kwargs['ndims_']
        self.__init__(**kwargs)
Пример #11
0
 def test_sort_shakespeare(self):
     shakespeare = IOHandler(
         '../../Assets/Files/shakespeare-complete-works.txt')
     actual, expected = shakespeare.words, shakespeare.sorted_words
     self.assertTrue(len(actual) > 0)
     actual = self.alg(actual)
     self.assertEqual(expected, actual)
     self.assertCountEqual(expected, actual)
     self.assertSequenceEqual(expected, actual)
Пример #12
0
    def load(cls, path):
        """Loads serialized file to initialize class instance

        Args:
            path (str): Path to file
        """
        buffer = io.load_dill(path)
        if not isinstance(buffer, cls):
            raise TypeError("Loaded serialized file is not of proper class")
        return copy.deepcopy(buffer)
    def load(self, path):
        """Loads serialized file to initalize ConfigFile instance

        Args:
            path (str): path to file
        """
        kwargs = io.load_pickle(path)
        kwargs["optimizer"] = kwargs["optimizer_class"](
            **kwargs["optimizer_config"])
        del kwargs["session_dir"], kwargs["optimizer_class"], kwargs[
            "optimizer_config"]
        self.__init__(**kwargs)
    def load(self, path):
        """Loads builder attributes

        Args:
            path (str): file path
        """
        kwargs = io.load_pickle(path)
        del kwargs['ndims_']
        if "use_segmentation" not in kwargs.keys():
            # TODO : write more generic weight to set unexisting attributes to default
            kwargs["use_segmentation"] = False
        self.__init__(**kwargs)
    def fit(self,
            train_ids,
            val_ids,
            generator="luna",
            loop=True,
            shuffle=True,
            use_affine=False):
        """Trains model

        Args:
            train_ids (list): list of training scans ids
            val_ids (list): list of validation scans ids
            loop (boolean): If true, endlessly loop on data (default: false).
            generator (str): generator type to use {"luna", "segmentation", "atlas"}
            shuffle (boolean): If true, scans are shuffled (default: false)
        """
        self.logger.verbose(f"Number of training scans : {len(train_ids)}\n")
        self.logger.verbose(f"Number of validation scans : {len(val_ids)}\n")
        pd.DataFrame(train_ids).to_csv(os.path.join(
            self.config.session_dir, LunaTrainer.train_ids_filename),
                                       index=False,
                                       header=False)
        pd.DataFrame(val_ids).to_csv(os.path.join(
            self.config.session_dir, LunaTrainer.val_ids_filename),
                                     index=False,
                                     header=False)

        (width, height, depth) = self.config.input_shape

        if generator == "luna":
            train_gen = gen.scan_generator(train_ids, width, height, depth,
                                           loop, shuffle, use_affine)
            val_gen = gen.scan_generator(val_ids, width, height, depth, loop,
                                         shuffle, use_affine)
        elif generator == "segmentation":
            train_gen = gen.scan_and_seg_generator(train_ids, width, height,
                                                   depth, loop, shuffle,
                                                   use_affine)
            val_gen = gen.scan_and_seg_generator(val_ids, width, height, depth,
                                                 loop, shuffle, use_affine)
        elif generator == "atlas":
            if not self.config.atlas_id:
                raise RuntimeError(
                    "Must specify an atlas id if using atlas registration")
            train_gen = gen.atlas_generator(self.config.atlas_id, train_ids,
                                            width, height, depth, loop,
                                            shuffle, use_affine)
            val_gen = gen.atlas_generator(self.config.atlas_id, val_ids, width,
                                          height, depth, loop, shuffle,
                                          use_affine)
        elif generator == "atlas_seg":
            if not self.config.atlas_id:
                raise RuntimeError(
                    "Must specify an atlas id if using atlas registration")
            train_gen = gen.atlas_seg_generator(self.config.atlas_id,
                                                train_ids, width, height,
                                                depth, loop, shuffle,
                                                use_affine)
            val_gen = gen.atlas_seg_generator(self.config.atlas_id, val_ids,
                                              width, height, depth, loop,
                                              shuffle, use_affine)
        else:
            raise UnboundLocalError("Unkown specified generator")

        self.logger.verbose("Compiling model :\n")
        self.logger.verbose(f"\t - Generator : {generator}\n")
        self.logger.verbose(
            f"\t - Optimizer : {self.config.optimizer.__dict__}\n")
        self.logger.verbose(f"\t - Losses : {self.config.losses}\n")
        self.logger.verbose(
            f"\t - Losses weights : {self.config.loss_weights}\n")
        self.logger.verbose(f"\t - Callbacks : {self.config.callbacks}\n")
        self.model_.compile(optimizer=self.config.optimizer,
                            loss=self.config.losses,
                            loss_weights=self.config.loss_weights,
                            metrics=self.config.metrics)

        self.logger.verbose("******** Initiating training *********")
        validation_steps = max(int(0.2 * self.config.steps_per_epoch), 1)

        with tf.device(self.device_):
            training_loss = self.model_.fit_generator(
                generator=train_gen,
                initial_epoch=self.config.initial_epoch,
                epochs=self.config.epochs,
                callbacks=self.config.callbacks,
                steps_per_epoch=self.config.steps_per_epoch,
                verbose=self.verbose,
                validation_data=val_gen,
                validation_steps=validation_steps)

        io.save_json(os.path.join(self.main_dir_, "training_history.json"),
                     training_loss.history)
    def on_epoch_end(self, epoch, logs={}):
        observations_subdir_format = ConfigFile.observations_subdir_format.format(
            epoch=epoch + 1, **logs)
        src_filepath = self.src_filepath.format(epoch=epoch + 1, **logs)
        tgt_filepath = self.tgt_filepath.format(epoch=epoch + 1, **logs)
        pred_filepath = self.pred_filepath.format(epoch=epoch + 1, **logs)
        grad_x_filepath = self.grad_x_filepath.format(epoch=epoch + 1, **logs)
        grad_y_filepath = self.grad_y_filepath.format(epoch=epoch + 1, **logs)
        pred_seg_filepath = self.pred_seg_filepath.format(epoch=epoch + 1,
                                                          **logs)

        observations_dir = os.path.join(self.session_dir,
                                        ConfigFile.observations_dirname)
        io.mkdir(observations_subdir_format, observations_dir)

        src_gen = loader.preprocess_scans([self.src_id], *self.input_shape)
        tgt_gen = loader.preprocess_scans([self.tgt_id], *self.input_shape)
        src = next(src_gen)[0][np.newaxis, :, :, :, np.newaxis]
        tgt = next(tgt_gen)[0][np.newaxis, :, :, :, np.newaxis]

        if self.use_segmentation:
            tgt_seg_gen = loader.preprocess_segmentations([self.tgt_id],
                                                          *self.input_shape)
            tgt_seg = next(tgt_seg_gen)[0][np.newaxis, :, :, :, np.newaxis]
            output = self.model.predict([src, tgt, tgt_seg])
        else:
            output = self.model.predict([src, tgt])

        fig, _ = handler.display_n_slices(src.squeeze(), n=4, return_fig=True)
        fig.savefig(
            os.path.join(observations_dir, observations_subdir_format,
                         src_filepath))
        plt.close()
        fig, _ = handler.display_n_slices(tgt.squeeze(), n=4, return_fig=True)
        fig.savefig(
            os.path.join(observations_dir, observations_subdir_format,
                         tgt_filepath))
        plt.close()
        fig, _ = handler.display_n_slices(output[0].squeeze(),
                                          n=4,
                                          return_fig=True)
        fig.savefig(
            os.path.join(observations_dir, observations_subdir_format,
                         pred_filepath))
        plt.close()
        fig, _ = handler.display_n_slices(output[1].squeeze()[:, :, :, 0],
                                          n=4,
                                          return_fig=True)
        fig.savefig(
            os.path.join(observations_dir, observations_subdir_format,
                         grad_x_filepath))
        plt.close()
        fig, _ = handler.display_n_slices(output[1].squeeze()[:, :, :, 1],
                                          n=4,
                                          return_fig=True)
        fig.savefig(
            os.path.join(observations_dir, observations_subdir_format,
                         grad_y_filepath))
        plt.close()
        if self.use_segmentation:
            fig, _ = handler.display_n_slices(output[2].squeeze(),
                                              n=4,
                                              return_fig=True)
            fig.savefig(
                os.path.join(observations_dir, observations_subdir_format,
                             pred_seg_filepath))
            plt.close()
        del fig, _
Пример #17
0
 def test_sort_shakespeare(self):
     words = IOHandler(
         '../../Assets/Files/shakespeare-complete-works.txt').words
     self.assertTrue(len(words) > 0)
     with self.assertRaises(TimeoutError):
         self.alg(words)
Пример #18
0
class Tedd1104Dataset(Dataset):
    """TEDD1104 dataset."""
    def __init__(
        self,
        dataset_dir: str,
        hide_map_prob: float,
        dropout_images_prob: List[float],
        control_mode: str = "keyboard",
        train: bool = False,
    ):
        """
        INIT

        :param str dataset_dir: The directory of the dataset.
        :param bool hide_map_prob: Probability of hiding the minimap (0<=hide_map_prob<=1)
        :param List[float] dropout_images_prob: Probability of dropping an image (0<=dropout_images_prob<=1)
        :param str control_mode: Type of the user input: "keyboard" or "controller"
        :param bool train: If True, the dataset is used for training.
        """

        self.dataset_dir = dataset_dir
        self.hide_map_prob = hide_map_prob
        self.dropout_images_prob = dropout_images_prob
        self.control_mode = control_mode.lower()

        assert self.control_mode in [
            "keyboard",
            "controller",
        ], f"{self.control_mode} control mode not supported. Supported dataset types: [keyboard, controller].  "

        assert 0 <= hide_map_prob <= 1.0, (
            f"hide_map_prob not in 0 <= hide_map_prob <= 1.0 range. "
            f"hide_map_prob: {hide_map_prob}")

        assert len(dropout_images_prob) == 5, (
            f"dropout_images_prob must have 5 probabilities, one for each image in the sequence. "
            f"dropout_images_prob len: {len(dropout_images_prob)}")

        for dropout_image_prob in dropout_images_prob:
            assert 0 <= dropout_image_prob <= 1.0, (
                f"All probabilities in dropout_image_prob must be in the range 0 <= dropout_image_prob <= 1.0. "
                f"dropout_images_prob: {dropout_images_prob}")

        if train:
            self.transform = transforms.Compose([
                RemoveMinimap(hide_map_prob=hide_map_prob),
                RemoveImage(dropout_images_prob=dropout_images_prob),
                SplitImages(),
                ToTensor(),
                SequenceColorJitter(),
                Normalize(),
                MergeImages(),
            ])
        else:
            self.transform = transforms.Compose([
                # RemoveMinimap(hide_map_prob=hide_map_prob),
                # RemoveImage(dropout_images_prob=dropout_images_prob),
                SplitImages(),
                ToTensor(),
                # SequenceColorJitter(),
                Normalize(),
                MergeImages(),
            ])

        self.dataset_files = glob.glob(os.path.join(dataset_dir, "*.jpeg"))

        self.IOHandler = IOHandler()

    def __len__(self):
        """
        Returns the length of the dataset.

        :return: int - Length of the dataset.
        """
        return len(self.dataset_files)

    def __getitem__(self, idx):
        """
        Returns a sample from the dataset.

        :param int idx: Index of the sample.
        :return: Dict[str, torch.tensor]- Transformed sequence of images
        """
        if torch.is_tensor(idx):
            idx = int(idx)

        img_name = self.dataset_files[idx]
        image = None
        while image is None:
            try:
                image = io.imread(img_name)
            except (ValueError, FileNotFoundError) as err:
                error_message = str(err).split("\n")[-1]
                print(
                    f"Error reading image: {img_name} probably a corrupted file.\n"
                    f"Exception: {error_message}\n"
                    f"We will load a random image instead.")
                img_name = self.dataset_files[int(
                    len(self.dataset_files) * torch.rand(1))]

        y = self.IOHandler.imagename_input_conversion(
            image_name=img_name,
            output_type=self.control_mode,
        )

        sample = {"image": image, "y": y}

        return self.transform(sample)