def fit(self, train_x, train_y, test_x, test_y): if self.model is None: Printer.warning("Model was automatically built when fitting.") self.build() history = self.model.fit(x=train_x, y=train_y, batch_size=self.batch_size, epochs=self.n_epochs, validation_data=(test_x, test_y)) return history
def load(self, output_dir="output"): """Load dataset from `dataset_path` into memory and return a tuple of (images, labels) where `image[i]` is the i'th preprocessed image normalized into 0-1 range and `labels[i]` is the label of i'th image. `dataset_path` must have directories representing labels and images inside each directory.""" cache_file = os.path.join(output_dir, self.cache_file_name) Printer.information("Searching for cache file: " + cache_file) if os.path.exists(cache_file): Printer.information("Cache file found. Loading from cache.") with open(cache_file, 'rb') as fr: images, labels = pickle.load(fr) else: Printer.warning("Cache file not found") Printer.information("Started loading dataset") images = [] labels = [] image_paths = self._get_image_paths() for ind, image_path in enumerate(image_paths): image = self._process_image(image_path) label = image_path.split(os.path.sep)[-2] images.append(image) labels.append(label) Printer.processing(f"Loaded {ind}/{len(image_paths)} images.") Printer.end_processing() images = np.array(images, dtype='float') labels = np.array(labels) if not sys.getsizeof(images) > 1024 * 1024 * 1024: with open(cache_file, 'wb') as fw: pickle.dump((images, labels), fw) Printer.information("Dataset loaded into memory") self.images = images self.labels = labels
def fit(self, train_x, train_y, test_x, test_y): if self.model is None: Printer.warning("Model was automatically built when fitting.") self.build() data_augmenter = ImageDataGenerator(rotation_range=30, width_shift_range=0.1, height_shift_range=0.1, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, fill_mode='nearest') steps_per_epoch = len(train_x) // self.batch_size history = self.model.fit_generator(data_augmenter.flow( train_x, train_y, batch_size=self.batch_size), validation_data=(test_x, test_y), steps_per_epoch=steps_per_epoch, epochs=self.n_epochs) return history
def _get_image_paths(self): """Return a list of image files inside the `base_path` """ image_paths = [] for (dir_path, _, file_names) in os.walk(self.base_path): for file_name in file_names: if os.extsep not in file_name: Printer.warning( "Files without extension found: {}".format(file_name)) continue extension = file_name.split(os.extsep)[-1] if extension not in self.image_extensions: Printer.warning( "Non-image files found: {}".format(file_name)) continue if '.ipynb' in dir_path: Printer.warning("IPyNb caches found: {}".format(file_name)) continue image_paths.append(os.path.join(dir_path, file_name)) random.shuffle(image_paths) Printer.information(f"Found {len(image_paths)} images") return image_paths