def test_image_data_generator_fit(self): generator = preprocessing_image.ImageDataGenerator( featurewise_center=True, samplewise_center=True, featurewise_std_normalization=True, samplewise_std_normalization=True, zca_whitening=True, data_format='channels_last') # Test grayscale x = np.random.random((32, 10, 10, 1)) generator.fit(x) # Test RBG x = np.random.random((32, 10, 10, 3)) generator.fit(x) generator = preprocessing_image.ImageDataGenerator( featurewise_center=True, samplewise_center=True, featurewise_std_normalization=True, samplewise_std_normalization=True, zca_whitening=True, data_format='channels_first') # Test grayscale x = np.random.random((32, 1, 10, 10)) generator.fit(x) # Test RBG x = np.random.random((32, 3, 10, 10)) generator.fit(x)
def test_image_data_generator_invalid_data(self): generator = preprocessing_image.ImageDataGenerator( featurewise_center=True, samplewise_center=True, featurewise_std_normalization=True, samplewise_std_normalization=True, zca_whitening=True, data_format='channels_last') # Test fit with invalid data with self.assertRaises(ValueError): x = np.random.random((3, 10, 10)) generator.fit(x) # Test flow with invalid data with self.assertRaises(ValueError): generator.flow(np.arange(5)) # Invalid number of channels: will work but raise a warning x = np.random.random((32, 10, 10, 5)) generator.flow(x) with self.assertRaises(ValueError): generator = preprocessing_image.ImageDataGenerator( data_format='unknown') generator = preprocessing_image.ImageDataGenerator( zoom_range=(2, 2))
def get_batches(self, path, gen = image.ImageDataGenerator(), shuffle = True, batch_size = 8, class_mode = 'categorical', target_size = (224, 224)): ''' Takes the path to a directory, and generates batches of a ugmented/normalized data. Batches are yielded indefinitely, in an infinite loop. Basically a wrapper around image.ImageDataGenerator().flow_from_directory() We utilise the default settings ImageDataGenerator(featurewise_center = False, samplewise_center = False, featurewise_std_normalization = False, samplewise_std_normalization = False, zca_whitening = False, zca_epsilon = 1e-6, rotation_range = 0., width_shift_range = 0., height_shift_range = 0., shear_range = 0., zoom_range = 0., channel_shift_range = 0., fill_mode = 'nearest', cval = 0., horizontal_flip = False, vertical_flip = False, rescale = None, preprocessing_function = None, data_format = K.image_data_format()) Args: path (str): Path to directory with images to flow from. generator (fnc): Initialised image.ImageDataGenerator() class with arguments, if other than default values are wanted. shuffle (bol): Indicates whether or not the data should be shuffled. Default is true. batch_size (int): Size of the batches of data. Default is 8 class_mode (str): one of "categorical", "binary", "sparse" or None. Default: "categorical". Determines the type of label arrays that are returned: "categorical" will be 2D one-hot encoded labels, "binary" will be 1D binary labels, "sparse" will be 1D integer labels. If None, no labels are returned (the generator will only yield batches of image data, which is useful to use model.predict_generator(), model.evaluate_generator(), etc.). Returns: An initialised ImageDataGenerator().flow_from_directory() object ready with batches to be passed to training function. ''' # Note all data is rezised to 224 x 224 pixel images return gen.flow_from_directory(path, target_size = target_size, class_mode = class_mode, shuffle = shuffle, batch_size = batch_size)
def test_batch_standardize(self): if PIL is None: return # Skip test if PIL is not available. # ImageDataGenerator.standardize should work on batches for test_images in _generate_test_images(): img_list = [] for im in test_images: img_list.append(preprocessing_image.img_to_array(im)[None, ...]) images = np.vstack(img_list) generator = preprocessing_image.ImageDataGenerator( featurewise_center=True, samplewise_center=True, featurewise_std_normalization=True, samplewise_std_normalization=True, zca_whitening=True, rotation_range=90., width_shift_range=0.1, height_shift_range=0.1, shear_range=0.5, zoom_range=0.2, channel_shift_range=0., brightness_range=(1, 5), fill_mode='nearest', cval=0.5, horizontal_flip=True, vertical_flip=True) generator.fit(images, augment=True) transformed = np.copy(images) for i, im in enumerate(transformed): transformed[i] = generator.random_transform(im) transformed = generator.standardize(transformed)
def get_batches(self, path, gen=image.ImageDataGenerator(), shuffle=True, batch_size=8, class_mode='categorical'): """ Takes the path to a directory, and generates batches of augmented/normalized data. Yields batches indefinitely, in an infinite loop. See Keras documentation: https://keras.io/preprocessing/image/ """ return gen.flow_from_directory(path, target_size=(224,224), class_mode=class_mode, shuffle=shuffle, batch_size=batch_size)
def _get_directory_iterator(self, route): image_generator = image.ImageDataGenerator(rescale=1.0 / 255) return image_generator.flow_from_directory(directory=route, target_size=(self.__width, self.__height), batch_size=self.batch_size, class_mode="categorical")
def _set_directory_iterator(self, route: str) -> None: image_generator = image_preprocessor.ImageDataGenerator(rescale=1.0 / 255) self.directory_iterator = image_generator.flow_from_directory( directory=route, target_size=(self.width, self.height), batch_size=self.bath_size, class_mode="categorical")
def get_batches(dirname, target_size, gen=image.ImageDataGenerator(), shuffle=True, batch_size=4, class_mode='categorical'): return gen.flow_from_directory(dirname, target_size=target_size, class_mode=class_mode, shuffle=shuffle, batch_size=batch_size)
def _load_noncached(self): self.is_cached = False self.image_data_generator = image.ImageDataGenerator() location = os.path.join(self.path, self.name) x = self.image_data_generator.flow_from_directory( directory=location, target_size=(224, 224), class_mode='categorical', shuffle=True, batch_size=self.batch_size) self.iter = x self.file_count = len(self.iter.filenames)
def test_image_data_generator(self): if PIL is None: return # Skip test if PIL is not available. for test_images in _generate_test_images(): img_list = [] for im in test_images: img_list.append(preprocessing_image.img_to_array(im)[None, ...]) images = np.vstack(img_list) generator = preprocessing_image.ImageDataGenerator( featurewise_center=True, samplewise_center=True, featurewise_std_normalization=True, samplewise_std_normalization=True, zca_whitening=True, rotation_range=90., width_shift_range=0.1, height_shift_range=0.1, shear_range=0.5, zoom_range=0.2, channel_shift_range=0., brightness_range=(1, 5), fill_mode='nearest', cval=0.5, horizontal_flip=True, vertical_flip=True) # Basic test before fit x = np.random.random((32, 10, 10, 3)) generator.flow(x) # Fit generator.fit(images, augment=True) for x, _ in generator.flow( images, np.arange(images.shape[0]), shuffle=True): self.assertEqual(x.shape[1:], images.shape[1:]) break
def generate_aug_data(train_size=100, val_size=50): imgs = os.listdir(base_path + "resized") X = np.zeros((len(imgs), 512, 512, n_channels)) Y = np.zeros((len(imgs), 512, 512, 1, len(CLASS_NAMES))) for i, filename in (enumerate(os.listdir(base_path + "resized/"))): if not filename.endswith('.png'): continue full_path = base_path + "resized/" + filename x_img = cv2.imread(full_path) if n_channels == 1: x_img = cv2.cvtColor(x_img, cv2.COLOR_BGR2GRAY) x_img = cv2.adaptiveThreshold(x_img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 12) X[i, :, :, 0] = np.asarray(x_img, dtype=np.uint8) else: X[i, :, :, :n_channels] = np.asarray(x_img, dtype=np.uint8) Y[i] = rs.create_output_masked_tensor(filename, CLASS_NAMES) X_train = X[:int(X.shape[0] * 0.7)] X_test = X[int(X.shape[0] * 0.7):] Y_train = Y[:int(Y.shape[0] * 0.7)] Y_test = Y[int(Y.shape[0] * 0.7):] data_gen_args = dict( shear_range=0.1, rotation_range=30, zoom_range=0.1, width_shift_range=0.1, height_shift_range=0.1, ) image_datagen = image.ImageDataGenerator(**data_gen_args) mask_datagen = [ image.ImageDataGenerator(**data_gen_args) for i in range(len(CLASS_NAMES)) ] image_datagen.fit(X_train[:int(X_train.shape[0] * 0.9)], augment=True, seed=seed) for i in range(len(CLASS_NAMES)): mask_datagen[i].fit(Y_train[:int(Y_train.shape[0] * 0.9), :, :, :, i], augment=True, seed=seed) print('starting image generation') batch_size_gen = 1 x = image_datagen.flow(X_train[:int(X_train.shape[0] * 0.9)], batch_size=batch_size_gen, shuffle=True, seed=seed, save_to_dir=base_path + "aug/x/", save_format="png") y = [ mask_datagen[i].flow(Y_train[:int(Y_train.shape[0] * 0.9), :, :, :, i], batch_size=batch_size_gen, shuffle=True, seed=seed, save_to_dir=base_path + f"aug/y/{CLASS_NAMES[i]}", save_format="png") for i in range(len(CLASS_NAMES)) ] # Creating the validation Image and Mask generator image_datagen_val = image.ImageDataGenerator(**data_gen_args) mask_datagen_val = [ image.ImageDataGenerator(**data_gen_args) for i in range(len(CLASS_NAMES)) ] image_datagen_val.fit(X_train[int(X_train.shape[0] * 0.9):], augment=True, seed=seed) for i in range(len(CLASS_NAMES)): mask_datagen_val[i].fit(Y_train[int(Y_train.shape[0] * 0.9):, :, :, :, i], augment=True, seed=seed) x_val = image_datagen_val.flow(X_train[int(X_train.shape[0] * 0.9):], batch_size=batch_size_gen, shuffle=True, seed=seed, save_to_dir=base_path + "aug/xval/", save_format="png") print(Y_train.shape) y_val = [ mask_datagen_val[i].flow( Y_train[int(Y_train.shape[0] * 0.9):, :, :, :, i], batch_size=batch_size_gen, shuffle=True, seed=seed, save_to_dir=base_path + f"aug/yval/{CLASS_NAMES[i]}", save_format="png") for i in range(len(CLASS_NAMES)) ] print('remove old data and create all required dirs') shutil.rmtree(base_path + f"aug/x/", ignore_errors=True) shutil.rmtree(base_path + f"aug/xval/", ignore_errors=True) os.mkdir(base_path + f"aug/x") os.mkdir(base_path + f"aug/xval") for i in range(len(CLASS_NAMES)): if os.path.exists(base_path + f"aug/y/{CLASS_NAMES[i]}"): shutil.rmtree(base_path + f"aug/y/{CLASS_NAMES[i]}", ignore_errors=True) os.mkdir(base_path + f"aug/y/{CLASS_NAMES[i]}") if os.path.exists(base_path + f"aug/yval/{CLASS_NAMES[i]}"): shutil.rmtree(base_path + f"aug/yval/{CLASS_NAMES[i]}", ignore_errors=True) os.mkdir(base_path + f"aug/yval/{CLASS_NAMES[i]}") print('finished') print('generation: x') invoke_generator(x, train_size) print('generation: x_val') invoke_generator(x_val, val_size) for i in range(len(CLASS_NAMES)): print(f'generation: y: {CLASS_NAMES[i]}') invoke_generator(y[i], train_size) print(f'generation: y_val: {CLASS_NAMES[i]}') invoke_generator(y_val[i], val_size)
train_dir = os.path.join(base_dir, 'train') val_dir = os.path.join(base_dir, 'val') test_dir = os.path.join(base_dir, 'test') train_cats_dir = os.path.join(train_dir, 'cats') val_cats_dir = os.path.join(val_dir, 'cats') test_cats_dir = os.path.join(test_dir, 'cats') train_dogs_dir = os.path.join(train_dir, 'dogs') val_dogs_dir = os.path.join(val_dir, 'dogs') test_dogs_dir = os.path.join(test_dir, 'dogs') # Setting up the data aug config datagen = image.ImageDataGenerator(rotation_range=40, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, fill_mode='nearest') fnames = [ os.path.join(train_cats_dir, fn) for fn in os.listdir(train_cats_dir) ] img_path = fnames[3] img = image.load_img(img_path, target_size=(150, 150)) x = image.img_to_array(img) x = x.reshape((1, ) + x.shape) i = 0 for batch in datagen.flow(x, batch_size=1): plt.figure(i)
def directory_iterator_with_validation_split_test_helper( self, validation_split): if PIL is None: return # Skip test if PIL is not available. num_classes = 2 tmp_folder = tempfile.mkdtemp(prefix='test_images') # create folders and subfolders paths = [] for cl in range(num_classes): class_directory = 'class-{}'.format(cl) classpaths = [ class_directory, os.path.join(class_directory, 'subfolder-1'), os.path.join(class_directory, 'subfolder-2'), os.path.join(class_directory, 'subfolder-1', 'sub-subfolder') ] for path in classpaths: os.mkdir(os.path.join(tmp_folder, path)) paths.append(classpaths) # save the images in the paths count = 0 filenames = [] for test_images in _generate_test_images(): for im in test_images: # rotate image class im_class = count % num_classes # rotate subfolders classpaths = paths[im_class] filename = os.path.join(classpaths[count % len(classpaths)], 'image-{}.jpg'.format(count)) filenames.append(filename) im.save(os.path.join(tmp_folder, filename)) count += 1 # create iterator generator = preprocessing_image.ImageDataGenerator( validation_split=validation_split) with self.assertRaises(ValueError): generator.flow_from_directory(tmp_folder, subset='foo') num_validation = int(count * validation_split) num_training = count - num_validation train_iterator = generator.flow_from_directory( tmp_folder, subset='training') self.assertEqual(train_iterator.samples, num_training) valid_iterator = generator.flow_from_directory( tmp_folder, subset='validation') self.assertEqual(valid_iterator.samples, num_validation) # check number of classes and images self.assertEqual(len(train_iterator.class_indices), num_classes) self.assertEqual(len(train_iterator.classes), num_training) self.assertEqual( len(set(train_iterator.filenames) & set(filenames)), num_training) shutil.rmtree(tmp_folder)
def test_directory_iterator(self): if PIL is None: return # Skip test if PIL is not available. num_classes = 2 temp_dir = self.get_temp_dir() self.addCleanup(shutil.rmtree, temp_dir) # create folders and subfolders paths = [] for cl in range(num_classes): class_directory = 'class-{}'.format(cl) classpaths = [ class_directory, os.path.join(class_directory, 'subfolder-1'), os.path.join(class_directory, 'subfolder-2'), os.path.join( class_directory, 'subfolder-1', 'sub-subfolder') ] for path in classpaths: os.mkdir(os.path.join(temp_dir, path)) paths.append(classpaths) # save the images in the paths count = 0 filenames = [] for test_images in _generate_test_images(): for im in test_images: # rotate image class im_class = count % num_classes # rotate subfolders classpaths = paths[im_class] filename = os.path.join(classpaths[count % len(classpaths)], 'image-{}.jpg'.format(count)) filenames.append(filename) im.save(os.path.join(temp_dir, filename)) count += 1 # Test image loading util fname = os.path.join(temp_dir, filenames[0]) _ = preprocessing_image.load_img(fname) _ = preprocessing_image.load_img(fname, grayscale=True) _ = preprocessing_image.load_img(fname, target_size=(10, 10)) _ = preprocessing_image.load_img(fname, target_size=(10, 10), interpolation='bilinear') # create iterator generator = preprocessing_image.ImageDataGenerator() dir_iterator = generator.flow_from_directory(temp_dir) # check number of classes and images self.assertEqual(len(dir_iterator.class_indices), num_classes) self.assertEqual(len(dir_iterator.classes), count) self.assertEqual(set(dir_iterator.filenames), set(filenames)) def preprocessing_function(x): """This will fail if not provided by a Numpy array. Note: This is made to enforce backward compatibility. Args: x: A numpy array. Returns: An array of zeros with the same shape as the given array. """ self.assertEqual(x.shape, (26, 26, 3)) self.assertIs(type(x), np.ndarray) return np.zeros_like(x) # Test usage as Sequence generator = preprocessing_image.ImageDataGenerator( preprocessing_function=preprocessing_function) dir_seq = generator.flow_from_directory( str(temp_dir), target_size=(26, 26), color_mode='rgb', batch_size=3, class_mode='categorical') self.assertEqual(len(dir_seq), count // 3 + 1) x1, y1 = dir_seq[1] self.assertEqual(x1.shape, (3, 26, 26, 3)) self.assertEqual(y1.shape, (3, num_classes)) x1, y1 = dir_seq[5] self.assertTrue((x1 == 0).all())
T_BATCH_SIZE = 64 V_BATCH_SIZE = 64 model = ResNet50(weights="imagenet", input_shape=(224, 224, 3)) for layer in model.layers[:-1]: layer.trainable = False new_model = models.Sequential() new_model.add(model) #new_model.add(layers.Flatten()) new_model.add(layers.Dense(2, activation='softmax')) new_model.summary() train_datagen = image.ImageDataGenerator() val_datagen = image.ImageDataGenerator() train_generator = train_datagen.flow_from_directory(TRAIN_DIR, target_size=(224, 224), batch_size=T_BATCH_SIZE) valid_generator = val_datagen.flow_from_directory(VALID_DIR, target_size=(224, 224), batch_size=V_BATCH_SIZE) opt = optimizers.Adagrad() new_model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
def main(args): dataset = args.dataset data_type = args.data_type # data dir, image size and batch size DATA_DIR = 'data' TRAIN_DIR = os.path.join(DATA_DIR, 'train') VALID_DIR = os.path.join(DATA_DIR, 'valid') SIZE = (224, 224) BATCH_SIZE = 4 # remove files try: shutil.rmtree(DATA_DIR) except: pass train_ratio = 0.8 # 80% data for training, the rest for testing # get the list of filenames and corresponding list of labels for training et validation train_filenames = [] train_labels = [] val_filenames = [] val_labels = [] # read files into label and frame lists with open('../data/labels/' + dataset + '_' + data_type + '_label.csv') as f: frames_labels = [(line.strip().split(',')[0], line.strip().split(',')[1]) for line in f] # re-organize data by labels file_dir = '../data/' + dataset + '/' + data_type + '/jpg/' file_format = '.jpeg' dict_frame = { } # key: label value, value: a list of indice in the original file for fr_lb in frames_labels: fr, lb = fr_lb if (lb not in dict_frame): dict_frame[lb] = [] dict_frame[lb].append(file_dir + fr + file_format) random.seed() # using current time as the seed # generate filenames and labels for training and validation dataset for lb in dict_frame: # pick random indices for training data for lb in dict_frame train_index = random.sample(range(0, len(dict_frame[lb])), int(train_ratio * len(dict_frame[lb]))) for index in range(len(dict_frame[lb])): # training data if (index in train_index): train_filenames.append(dict_frame[lb][index]) train_labels.append(int(lb) - 1) # validation data else: val_filenames.append(dict_frame[lb][index]) val_labels.append(int(lb) - 1) assert set(train_labels) == set( val_labels), "Train and val labels don't correspond:\n{}\n{}".format( set(train_labels), set(val_labels)) # create new dir data/train/label_x and data/valid/label_x for label in set(train_labels): os.makedirs(os.path.join(TRAIN_DIR, str(label)), exist_ok=True) os.makedirs(os.path.join(VALID_DIR, str(label)), exist_ok=True) # copy files for tr_file, label in zip(train_filenames, train_labels): shutil.copy2(tr_file, os.path.join(TRAIN_DIR, str(label))) for val_file, label in zip(val_filenames, val_labels): shutil.copy2(val_file, os.path.join(VALID_DIR, str(label))) # train models num_train_samples = sum([len(files) for r, d, files in os.walk(TRAIN_DIR)]) num_valid_samples = sum([len(files) for r, d, files in os.walk(VALID_DIR)]) num_train_steps = math.floor(num_train_samples / BATCH_SIZE) num_valid_steps = math.floor(num_valid_samples / BATCH_SIZE) gen = image.ImageDataGenerator() val_gen = image.ImageDataGenerator(horizontal_flip=True, vertical_flip=True) batches = gen.flow_from_directory(TRAIN_DIR, target_size=SIZE, class_mode='categorical', shuffle=True, batch_size=BATCH_SIZE) val_batches = val_gen.flow_from_directory(VALID_DIR, target_size=SIZE, class_mode='categorical', shuffle=True, batch_size=BATCH_SIZE) model = DenseNet121() classes = list(iter(batches.class_indices)) model.layers.pop() for layer in model.layers: layer.trainable = False last = model.layers[-1].output x = Dense(len(classes), activation="softmax")(last) finetuned_model = Model(model.input, x) finetuned_model.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy']) for c in batches.class_indices: classes[batches.class_indices[c]] = c finetuned_model.classes = classes early_stopping = EarlyStopping(patience=450) checkpointer = ModelCheckpoint('./densenet_model/densenet_121_best.h5', verbose=1, save_best_only=True) finetuned_model.fit_generator(batches, steps_per_epoch=num_train_steps, epochs=450, callbacks=[early_stopping, checkpointer], validation_data=val_batches, validation_steps=num_valid_steps) finetuned_model.save('./densenet_model/densenet_121_final.h5')
def test_image_data_generator_with_split_value_error(self): with self.assertRaises(ValueError): preprocessing_image.ImageDataGenerator(validation_split=5)