示例#1
0
    def fit(self, train_x, train_y, test_x, test_y):
        if self.model is None:
            Printer.warning("Model was automatically built when fitting.")
            self.build()

        history = self.model.fit(x=train_x,
                                 y=train_y,
                                 batch_size=self.batch_size,
                                 epochs=self.n_epochs,
                                 validation_data=(test_x, test_y))
        return history
示例#2
0
    def load(self, output_dir="output"):
        """Load dataset from `dataset_path` into memory and return
        a tuple of (images, labels) where `image[i]` is the i'th preprocessed image
        normalized into 0-1 range
        and `labels[i]` is the label of i'th image.

        `dataset_path` must have directories representing labels and
        images inside each directory."""

        cache_file = os.path.join(output_dir, self.cache_file_name)
        Printer.information("Searching for cache file: " + cache_file)

        if os.path.exists(cache_file):
            Printer.information("Cache file found. Loading from cache.")
            with open(cache_file, 'rb') as fr:
                images, labels = pickle.load(fr)
        else:
            Printer.warning("Cache file not found")
            Printer.information("Started loading dataset")

            images = []
            labels = []

            image_paths = self._get_image_paths()

            for ind, image_path in enumerate(image_paths):
                image = self._process_image(image_path)
                label = image_path.split(os.path.sep)[-2]

                images.append(image)
                labels.append(label)

                Printer.processing(f"Loaded {ind}/{len(image_paths)} images.")

            Printer.end_processing()

            images = np.array(images, dtype='float')
            labels = np.array(labels)

            if not sys.getsizeof(images) > 1024 * 1024 * 1024:
                with open(cache_file, 'wb') as fw:
                    pickle.dump((images, labels), fw)

        Printer.information("Dataset loaded into memory")
        self.images = images
        self.labels = labels
示例#3
0
    def fit(self, train_x, train_y, test_x, test_y):
        if self.model is None:
            Printer.warning("Model was automatically built when fitting.")
            self.build()

        data_augmenter = ImageDataGenerator(rotation_range=30,
                                            width_shift_range=0.1,
                                            height_shift_range=0.1,
                                            shear_range=0.2,
                                            zoom_range=0.2,
                                            horizontal_flip=True,
                                            fill_mode='nearest')

        steps_per_epoch = len(train_x) // self.batch_size
        history = self.model.fit_generator(data_augmenter.flow(
            train_x, train_y, batch_size=self.batch_size),
                                           validation_data=(test_x, test_y),
                                           steps_per_epoch=steps_per_epoch,
                                           epochs=self.n_epochs)
        return history
示例#4
0
    def _get_image_paths(self):
        """Return a list of image files inside the `base_path` """

        image_paths = []
        for (dir_path, _, file_names) in os.walk(self.base_path):
            for file_name in file_names:
                if os.extsep not in file_name:
                    Printer.warning(
                        "Files without extension found: {}".format(file_name))
                    continue
                extension = file_name.split(os.extsep)[-1]
                if extension not in self.image_extensions:
                    Printer.warning(
                        "Non-image files found: {}".format(file_name))
                    continue
                if '.ipynb' in dir_path:
                    Printer.warning("IPyNb caches found: {}".format(file_name))
                    continue
                image_paths.append(os.path.join(dir_path, file_name))

        random.shuffle(image_paths)

        Printer.information(f"Found {len(image_paths)} images")
        return image_paths