def test_dataframe_iterator_n(self, tmpdir): # save the images in the tmpdir count = 0 filenames = [] for test_images in self.all_test_images: for im in test_images: filename = "image-{}.png".format(count) filenames.append(filename) im.save(str(tmpdir / filename)) count += 1 # exclude first two items n_files = len(filenames) input_filenames = filenames[2:] # create dataframes classes = np.random.randint(2, size=len(input_filenames)) df = pd.DataFrame({"filename": input_filenames}) df2 = pd.DataFrame({"filename": input_filenames, "class": classes}) # create iterators generator = image.ImageDataGenerator() df_iterator = generator.flow_from_dataframe(df, str(tmpdir), has_ext=True, class_mode=None) df2_iterator = generator.flow_from_dataframe(df2, str(tmpdir), has_ext=True, class_mode='binary') # Test the number of items in iterators assert df_iterator.n == n_files - 2 assert df2_iterator.n == n_files - 2
def test_batch_standardize(self): # ImageDataGenerator.standardize should work on batches for test_images in self.all_test_images: img_list = [] for im in test_images: img_list.append(image.img_to_array(im)[None, ...]) images = np.vstack(img_list) generator = image.ImageDataGenerator( featurewise_center=True, samplewise_center=True, featurewise_std_normalization=True, samplewise_std_normalization=True, zca_whitening=True, rotation_range=90., width_shift_range=0.1, height_shift_range=0.1, shear_range=0.5, zoom_range=0.2, channel_shift_range=0., brightness_range=(1, 5), fill_mode='nearest', cval=0.5, horizontal_flip=True, vertical_flip=True) generator.fit(images, augment=True) transformed = np.copy(images) for i, im in enumerate(transformed): transformed[i] = generator.random_transform(im) transformed = generator.standardize(transformed)
def test_directory_iterator_class_mode_input(self, tmpdir): tmpdir.join('class-1').mkdir() # save the images in the paths count = 0 for test_images in self.all_test_images: for im in test_images: filename = str( tmpdir / 'class-1' / 'image-{}.jpg'.format(count)) im.save(filename) count += 1 # create iterator generator = image.ImageDataGenerator() dir_iterator = generator.flow_from_directory(str(tmpdir), class_mode='input') batch = next(dir_iterator) # check if input and output have the same shape assert(batch[0].shape == batch[1].shape) # check if the input and output images are not the same numpy array input_img = batch[0][0] output_img = batch[1][0] output_img[0][0][0] += 1 assert(input_img[0][0][0] != output_img[0][0][0])
def test_directory_iterator_with_validation_split(self, validation_split, num_training): num_classes = 2 tmp_folder = tempfile.mkdtemp(prefix='test_images') # create folders and subfolders paths = [] for cl in range(num_classes): class_directory = 'class-{}'.format(cl) classpaths = [ class_directory, os.path.join(class_directory, 'subfolder-1'), os.path.join(class_directory, 'subfolder-2'), os.path.join(class_directory, 'subfolder-1', 'sub-subfolder') ] for path in classpaths: os.mkdir(os.path.join(tmp_folder, path)) paths.append(classpaths) # save the images in the paths count = 0 filenames = [] for test_images in self.all_test_images: for im in test_images: # rotate image class im_class = count % num_classes # rotate subfolders classpaths = paths[im_class] filename = os.path.join( classpaths[count % len(classpaths)], 'image-{}.png'.format(count)) filenames.append(filename) im.save(os.path.join(tmp_folder, filename)) count += 1 # create iterator generator = image.ImageDataGenerator(validation_split=validation_split) with pytest.raises(ValueError): generator.flow_from_directory(tmp_folder, subset='foo') train_iterator = generator.flow_from_directory(tmp_folder, subset='training') assert train_iterator.samples == num_training valid_iterator = generator.flow_from_directory(tmp_folder, subset='validation') assert valid_iterator.samples == count - num_training # check number of classes and images assert len(train_iterator.class_indices) == num_classes assert len(train_iterator.classes) == num_training assert len(set(train_iterator.filenames) & set(filenames)) == num_training shutil.rmtree(tmp_folder)
def test_image_data_generator_with_validation_split(self): for test_images in self.all_test_images: img_list = [] for im in test_images: img_list.append(image.img_to_array(im)[None, ...]) images = np.vstack(img_list) labels = np.concatenate([ np.zeros((int(len(images) / 2), )), np.ones((int(len(images) / 2), )) ]) generator = image.ImageDataGenerator(validation_split=0.5) # training and validation sets would have different # number of classes, because labels are sorted with pytest.raises(ValueError, match='Training and validation subsets ' 'have different number of classes after ' 'the split.*'): generator.flow(images, labels, shuffle=False, batch_size=10, subset='validation') labels = np.concatenate([ np.zeros((int(len(images) / 4), )), np.ones((int(len(images) / 4), )), np.zeros((int(len(images) / 4), )), np.ones((int(len(images) / 4), )) ]) seq = generator.flow(images, labels, shuffle=False, batch_size=10, subset='validation') x, y = seq[0] assert 2 == len(np.unique(y)) seq = generator.flow(images, labels, shuffle=False, batch_size=10, subset='training') x2, y2 = seq[0] assert 2 == len(np.unique(y2)) with pytest.raises(ValueError): generator.flow(images, np.arange(images.shape[0]), shuffle=False, batch_size=3, subset='foo')
def test_dataframe_iterator_with_custom_indexed_dataframe(self, tmpdir): num_classes = 2 # save the images in the tmpdir count = 0 filenames = [] for test_images in self.all_test_images: for im in test_images: filename = "image-{}.png".format(count) filenames.append(filename) im.save(str(tmpdir / filename)) count += 1 # create dataframes classes = np.random.randint(num_classes, size=len(filenames)) df = pd.DataFrame({"filename": filenames, "class": classes}) df2 = pd.DataFrame({ "filename": filenames, "class": classes }, index=np.arange(1, len(filenames) + 1)) df3 = pd.DataFrame({ "filename": filenames, "class": classes }, index=filenames) # create iterators seed = 1 generator = image.ImageDataGenerator() df_iterator = generator.flow_from_dataframe(df, str(tmpdir), has_ext=True, seed=seed) df2_iterator = generator.flow_from_dataframe(df2, str(tmpdir), has_ext=True, seed=seed) df3_iterator = generator.flow_from_dataframe(df3, str(tmpdir), has_ext=True, seed=seed) # Test all iterators return same pairs of arrays for _ in range(len(filenames)): a1, c1 = next(df_iterator) a2, c2 = next(df2_iterator) a3, c3 = next(df3_iterator) assert np.array_equal(a1, a2) assert np.array_equal(a1, a3) assert np.array_equal(c1, c2) assert np.array_equal(c1, c3)
def test_dataframe_iterator_with_sort_and_drop_duplicates(self, tmpdir): # save the images in the tmpdir count = 0 filenames = [] for test_images in self.all_test_images: for im in test_images: filename = "image-{:0>5}.png".format(count) filenames.append(filename) im.save(str(tmpdir / filename)) count += 1 # prepare input_filenames n_files = len(filenames) idx_rand, idx_rand2 = np.random.randint(1, n_files, size=2) input_filenames = filenames[::-1] # reversed input_filenames2 = filenames[:idx_rand] + filenames[:idx_rand2] # create dataframes df = pd.DataFrame({"filename": input_filenames}) df2 = pd.DataFrame({"filename": input_filenames2}) # create iterators generator = image.ImageDataGenerator() df_sort_iterator = generator.flow_from_dataframe(df, str(tmpdir), class_mode=None, sort=True, shuffle=False) df_no_sort_iterator = generator.flow_from_dataframe(df, str(tmpdir), class_mode=None, sort=False, shuffle=False) df_drop_iterator = generator.flow_from_dataframe(df2, str(tmpdir), class_mode=None, drop_duplicates=True) df_no_drop_iterator = generator.flow_from_dataframe( df2, str(tmpdir), class_mode=None, drop_duplicates=False) # Test sort assert df_sort_iterator.filenames == df_no_sort_iterator.filenames[:: -1] assert df_sort_iterator.filenames[0] == filenames[0] assert df_no_sort_iterator.filenames[0] == filenames[-1] # Test drop_duplicates assert df_drop_iterator.n == len(set(input_filenames2)) assert df_no_drop_iterator.n == len(input_filenames2)
def test_dataframe_iterator_class_mode_input(self, tmpdir): # save the images in the paths count = 0 filenames = [] for test_images in self.all_test_images: for im in test_images: filename = str(tmpdir / 'image-{}.png'.format(count)) im.save(filename) filenames.append(filename) count += 1 df = pd.DataFrame({"filename": filenames}) generator = image.ImageDataGenerator() df_autoencoder_iterator = generator.flow_from_dataframe( df, str(tmpdir), x_col="filename", y_col=None, has_ext=True, class_mode="input") batch = next(df_autoencoder_iterator) # check if input and output have the same shape and they're the same assert (batch[0].all() == batch[1].all()) # check if the input and output images are not the same numpy array input_img = batch[0][0] output_img = batch[1][0] output_img[0][0][0] += 1 assert (input_img[0][0][0] != output_img[0][0][0]) df_autoencoder_iterator = generator.flow_from_dataframe( df, str(tmpdir), x_col="filename", y_col="class", has_ext=True, class_mode="input") batch = next(df_autoencoder_iterator) # check if input and output have the same shape and they're the same assert (batch[0].all() == batch[1].all()) # check if the input and output images are not the same numpy array input_img = batch[0][0] output_img = batch[1][0] output_img[0][0][0] += 1 assert (input_img[0][0][0] != output_img[0][0][0])
def test_image_data_generator_invalid_data(self): generator = image.ImageDataGenerator( featurewise_center=True, samplewise_center=True, featurewise_std_normalization=True, samplewise_std_normalization=True, zca_whitening=True, data_format='channels_last') # Test fit with invalid data with pytest.raises(ValueError): x = np.random.random((3, 10, 10)) generator.fit(x) # Test flow with invalid data with pytest.raises(ValueError): x = np.random.random((32, 10, 10)) generator.flow(np.arange(x.shape[0]))
def predict(avg_year, img_filename): datagen = image.ImageDataGenerator(rescale=1. / 255) testgen = datagen.flow_from_directory(directory=dataset_dir_path, target_size=(96, 96), batch_size=batch_size, class_mode="categorical") # class indices dictionary with the mapping: class_name -> class_index class_indices = testgen.class_indices class_names = [] for i, class_name in enumerate(class_indices): class_names.append(class_name) # load the test image img_path = os.path.join(dataset_dir_path, str(avg_year), img_filename) img = image.load_img(img_path, target_size=(96, 96)) img_array = image.img_to_array(img) img_array = img_array.reshape((1, ) + img_array.shape) img_array = img_array / 255. # load the model and predict the label model = models.load_model(model_path) pred = model.predict(img_array) predicted_label = class_names[np.argmax(pred)] predicted_prob = np.max(pred) true_label = str(avg_year) if predicted_label == true_label: color = 'blue' else: color = 'red' # plot the image plt.imshow(img, cmap=plt.cm.binary) plt.xlabel("{} {:2.0f}% ({})".format(predicted_label, 100 * predicted_prob, true_label), color=color) plt.show()
def generatePredictionsRGB(model): labels = ['No Finding', 'Enlarged Cardiomediastinum', 'Cardiomegaly', 'Lung Opacity', 'Lung Lesion', 'Edema', 'Consolidation', 'Pneumonia', 'Atelectasis', 'Pneumothorax', 'Pleural Effusion', 'Pleural Other', 'Fracture', 'Support Devices'] datagen = image.ImageDataGenerator(rescale=1. / 255) testdf = pd.read_csv(TEST) testgenerator = datagen.flow_from_dataframe(testdf, directory=None, color_mode='rgb', target_size=(256, 256), x_col='Path', y_col=labels, class_mode="other", shuffle=False, batch_size=64, drop_duplicates=False) predictions = model.predict_generator(testgenerator, testgenerator.n / 64, verbose=1) array = np.array([predictions, testgenerator.labels]) np.save("DenseNet_RGB_Predictions", array)
def preprocess(): train_data_generator = image.ImageDataGenerator(rescale=1. / 255, validation_split=0.2) training_generator = train_data_generator.flow_from_directory( directory=dataset_dir_path, target_size=(96, 96), batch_size=batch_size, class_mode="categorical", subset="training") validation_generator = train_data_generator.flow_from_directory( directory=dataset_dir_path, target_size=(96, 96), batch_size=batch_size, class_mode="categorical", subset="validation") return training_generator, validation_generator
def test_deterministic_transform(self): x = np.ones((32, 32, 3)) generator = image.ImageDataGenerator(rotation_range=90, fill_mode='constant') x = np.random.random((32, 32, 3)) assert np.allclose( generator.apply_transform(x, {'flip_vertical': True}), x[::-1, :, :]) assert np.allclose( generator.apply_transform(x, {'flip_horizontal': True}), x[:, ::-1, :]) x = np.ones((3, 3, 3)) x_rotated = np.array([[[0., 0., 0.], [0., 0., 0.], [1., 1., 1.]], [[0., 0., 0.], [1., 1., 1.], [1., 1., 1.]], [[0., 0., 0.], [0., 0., 0.], [1., 1., 1.]]]) assert np.allclose(generator.apply_transform(x, {'theta': 45}), x_rotated) assert np.allclose( image.apply_affine_transform(x, theta=45, channel_axis=2, fill_mode='constant'), x_rotated)
def test_image_data_generator_with_validation_split(self): for test_images in self.all_test_images: img_list = [] for im in test_images: img_list.append(image.img_to_array(im)[None, ...]) images = np.vstack(img_list) generator = image.ImageDataGenerator(validation_split=0.5) seq = generator.flow(images, np.arange(images.shape[0]), shuffle=False, batch_size=3, subset='validation') x, y = seq[0] assert list(y) == [0, 1, 2] seq = generator.flow(images, np.arange(images.shape[0]), shuffle=False, batch_size=3, subset='training') x2, y2 = seq[0] assert list(y2) == [4, 5, 6] with pytest.raises(ValueError): generator.flow(images, np.arange(images.shape[0]), shuffle=False, batch_size=3, subset='foo')
def test_dataframe_iterator_absolute_path(self, tmpdir): # save the images in the tmpdir count = 0 file_paths = [] for test_images in self.all_test_images: for im in test_images: filename = "image-{:0>5}.png".format(count) file_path = str(tmpdir / filename) file_paths.append(file_path) im.save(file_path) count += 1 # prepare an image with a forbidden extension. file_path_fbd = str(tmpdir / 'image-forbid.fbd') shutil.copy(file_path, file_path_fbd) # create dataframes classes = np.random.randint(2, size=len(file_paths)) df = pd.DataFrame({"filename": file_paths}) df2 = pd.DataFrame({"filename": file_paths, "class": classes}) df3 = pd.DataFrame({"filename": ['image-not-exist.png'] + file_paths}) df4 = pd.DataFrame({"filename": file_paths + [file_path_fbd]}) # create iterators generator = image.ImageDataGenerator() df_iterator = generator.flow_from_dataframe( df, None, has_ext=True, class_mode=None, shuffle=False, batch_size=1) df2_iterator = generator.flow_from_dataframe( df2, None, has_ext=True, class_mode='binary', shuffle=False, batch_size=1) df3_iterator = generator.flow_from_dataframe( df3, None, has_ext=True, class_mode=None, shuffle=False, batch_size=1) df4_iterator = generator.flow_from_dataframe( df4, None, has_ext=True, class_mode=None, shuffle=False, batch_size=1) validation_split = 0.2 generator_split = image.ImageDataGenerator(validation_split=validation_split) df_train_iterator = generator_split.flow_from_dataframe( df, None, has_ext=True, class_mode=None, shuffle=False, subset='training', batch_size=1) df_val_iterator = generator_split.flow_from_dataframe( df, None, has_ext=True, class_mode=None, shuffle=False, subset='validation', batch_size=1) # Test invalid use cases with pytest.raises(ValueError): generator.flow_from_dataframe(df, None, has_ext=False, class_mode=None) with pytest.raises(ValueError): generator.flow_from_dataframe(df2, None, has_ext=False, class_mode='binary') # Test the number of items in iterators assert df_iterator.n == len(file_paths) assert df2_iterator.n == len(file_paths) assert df3_iterator.n == len(file_paths) assert df4_iterator.n == len(file_paths) assert df_val_iterator.n == int(validation_split * len(file_paths)) assert df_train_iterator.n == len(file_paths) - df_val_iterator.n # Test flow_from_dataframe for i in range(len(file_paths)): a1 = next(df_iterator) a2, _ = next(df2_iterator) a3 = next(df3_iterator) a4 = next(df4_iterator) if i < df_val_iterator.n: a5 = next(df_val_iterator) else: a5 = next(df_train_iterator) assert np.array_equal(a1, a2) assert np.array_equal(a1, a3) assert np.array_equal(a1, a4) assert np.array_equal(a1, a5)
def test_dataframe_iterator(self, tmpdir): num_classes = 2 # save the images in the tmpdir count = 0 filenames = [] filenames_without = [] for test_images in self.all_test_images: for im in test_images: filename = "image-{}.png".format(count) filename_without = "image-{}".format(count) filenames.append(filename) filenames_without.append(filename_without) im.save(str(tmpdir / filename)) count += 1 df = pd.DataFrame({ "filename": filenames, "class": [random.randint(0, 1) for _ in filenames] }) # create iterator generator = image.ImageDataGenerator() df_iterator = generator.flow_from_dataframe(df, str(tmpdir), has_ext=True) df_sparse_iterator = generator.flow_from_dataframe(df, str(tmpdir), has_ext=True, class_mode="sparse") if np.isnan(df_sparse_iterator.classes).any(): raise ValueError('Invalid values.') df_without_ext = pd.DataFrame({ "filename": filenames_without, "class": [random.randint(0, 1) for _ in filenames_without] }) df_without_ext_iterator = generator.flow_from_dataframe(df_without_ext, str(tmpdir), has_ext=False) df_regression = pd.DataFrame({ "filename": filenames, "col1": [random.randrange(0, 1) for _ in filenames], "col2": [random.randrange(0, 1) for _ in filenames] }) df_multiple_y_iterator = generator.flow_from_dataframe( df_regression, str(tmpdir), y_col=["col1", "col2"], has_ext=True, class_mode="other") df_regression = pd.DataFrame( { "filename": filenames, "col1": [random.randrange(0, 1) for _ in filenames], "col2": [random.randrange(0, 1) for _ in filenames] }, dtype=str) batch_x, batch_y = next(df_multiple_y_iterator) with pytest.raises(TypeError): df_multiple_y_iterator = generator.flow_from_dataframe( df_regression, str(tmpdir), y_col=["col1", "col2"], has_ext=True, class_mode="other") # check number of classes and images assert len(df_iterator.class_indices) == num_classes assert len(df_iterator.classes) == count assert set(df_iterator.filenames) == set(filenames) assert len(df_without_ext_iterator.class_indices) == num_classes assert len(df_without_ext_iterator.classes) == count assert set(df_without_ext_iterator.filenames) == set(filenames) assert batch_y.shape[1] == 2 # Test invalid use cases with pytest.raises(ValueError): generator.flow_from_dataframe(df, str(tmpdir), color_mode='cmyk', has_ext=True) with pytest.raises(ValueError): generator.flow_from_dataframe(df, str(tmpdir), class_mode='output', has_ext=True) with pytest.raises(ValueError): generator.flow_from_dataframe(df_without_ext, str(tmpdir), has_ext=True) def preprocessing_function(x): """This will fail if not provided by a Numpy array. Note: This is made to enforce backward compatibility. """ assert x.shape == (26, 26, 3) assert type(x) is np.ndarray return np.zeros_like(x) # Test usage as Sequence generator = image.ImageDataGenerator( preprocessing_function=preprocessing_function) dir_seq = generator.flow_from_dataframe(df, str(tmpdir), target_size=(26, 26), color_mode='rgb', batch_size=3, class_mode='categorical', has_ext=True) assert len(dir_seq) == np.ceil(count / 3) x1, y1 = dir_seq[1] assert x1.shape == (3, 26, 26, 3) assert y1.shape == (3, num_classes) x1, y1 = dir_seq[5] assert (x1 == 0).all() with pytest.raises(ValueError): x1, y1 = dir_seq[9]
def test_image_data_generator(self, tmpdir): for test_images in self.all_test_images: img_list = [] for im in test_images: img_list.append(image.img_to_array(im)[None, ...]) images = np.vstack(img_list) generator = image.ImageDataGenerator( featurewise_center=True, samplewise_center=True, featurewise_std_normalization=True, samplewise_std_normalization=True, zca_whitening=True, rotation_range=90., width_shift_range=0.1, height_shift_range=0.1, shear_range=0.5, zoom_range=0.2, channel_shift_range=0., brightness_range=(1, 5), fill_mode='nearest', cval=0.5, horizontal_flip=True, vertical_flip=True) generator.fit(images, augment=True) for x, y in generator.flow(images, np.arange(images.shape[0]), shuffle=False, save_to_dir=str(tmpdir), batch_size=3): assert x.shape == images[:3].shape assert list(y) == [0, 1, 2] break # Test with sample weights for x, y, w in generator.flow( images, np.arange(images.shape[0]), shuffle=False, sample_weight=np.arange(images.shape[0]) + 1, save_to_dir=str(tmpdir), batch_size=3): assert x.shape == images[:3].shape assert list(y) == [0, 1, 2] assert list(w) == [1, 2, 3] break # Test with `shuffle=True` for x, y in generator.flow(images, np.arange(images.shape[0]), shuffle=True, save_to_dir=str(tmpdir), batch_size=3): assert x.shape == images[:3].shape # Check that the sequence is shuffled. assert list(y) != [0, 1, 2] break # Test without y for x in generator.flow(images, None, shuffle=True, save_to_dir=str(tmpdir), batch_size=3): assert type(x) is np.ndarray assert x.shape == images[:3].shape # Check that the sequence is shuffled. break # Test with a single miscellaneous input data array dsize = images.shape[0] x_misc1 = np.random.random(dsize) for i, (x, y) in enumerate( generator.flow((images, x_misc1), np.arange(dsize), shuffle=False, batch_size=2)): assert x[0].shape == images[:2].shape assert (x[1] == x_misc1[(i * 2):((i + 1) * 2)]).all() if i == 2: break # Test with two miscellaneous inputs x_misc2 = np.random.random((dsize, 3, 3)) for i, (x, y) in enumerate( generator.flow((images, [x_misc1, x_misc2]), np.arange(dsize), shuffle=False, batch_size=2)): assert x[0].shape == images[:2].shape assert (x[1] == x_misc1[(i * 2):((i + 1) * 2)]).all() assert (x[2] == x_misc2[(i * 2):((i + 1) * 2)]).all() if i == 2: break # Test cases with `y = None` x = generator.flow(images, None, batch_size=3).next() assert type(x) is np.ndarray assert x.shape == images[:3].shape x = generator.flow((images, x_misc1), None, batch_size=3, shuffle=False).next() assert type(x) is list assert x[0].shape == images[:3].shape assert (x[1] == x_misc1[:3]).all() x = generator.flow((images, [x_misc1, x_misc2]), None, batch_size=3, shuffle=False).next() assert type(x) is list assert x[0].shape == images[:3].shape assert (x[1] == x_misc1[:3]).all() assert (x[2] == x_misc2[:3]).all() # Test some failure cases: x_misc_err = np.random.random((dsize + 1, 3, 3)) with pytest.raises(ValueError) as e_info: generator.flow((images, x_misc_err), np.arange(dsize), batch_size=3) assert str(e_info.value).find('All of the arrays in') != -1 with pytest.raises(ValueError) as e_info: generator.flow((images, x_misc1), np.arange(dsize + 1), batch_size=3) assert str(e_info.value).find( '`x` (images tensor) and `y` (labels) ') != -1 # Test `flow` behavior as Sequence seq = generator.flow(images, np.arange(images.shape[0]), shuffle=False, save_to_dir=str(tmpdir), batch_size=3) assert len(seq) == images.shape[0] // 3 + 1 x, y = seq[0] assert x.shape == images[:3].shape assert list(y) == [0, 1, 2] # Test with `shuffle=True` seq = generator.flow(images, np.arange(images.shape[0]), shuffle=True, save_to_dir=str(tmpdir), batch_size=3, seed=123) x, y = seq[0] # Check that the sequence is shuffled. assert list(y) != [0, 1, 2] # `on_epoch_end` should reshuffle the sequence. seq.on_epoch_end() x2, y2 = seq[0] assert list(y) != list(y2)
def test_directory_iterator(self, tmpdir): num_classes = 2 # create folders and subfolders paths = [] for cl in range(num_classes): class_directory = 'class-{}'.format(cl) classpaths = [ class_directory, os.path.join(class_directory, 'subfolder-1'), os.path.join(class_directory, 'subfolder-2'), os.path.join(class_directory, 'subfolder-1', 'sub-subfolder') ] for path in classpaths: tmpdir.join(path).mkdir() paths.append(classpaths) # save the images in the paths count = 0 filenames = [] for test_images in self.all_test_images: for im in test_images: # rotate image class im_class = count % num_classes # rotate subfolders classpaths = paths[im_class] filename = os.path.join(classpaths[count % len(classpaths)], 'image-{}.png'.format(count)) filenames.append(filename) im.save(str(tmpdir / filename)) count += 1 # create iterator generator = image.ImageDataGenerator() dir_iterator = generator.flow_from_directory(str(tmpdir)) # check number of classes and images assert len(dir_iterator.class_indices) == num_classes assert len(dir_iterator.classes) == count assert set(dir_iterator.filenames) == set(filenames) # Test invalid use cases with pytest.raises(ValueError): generator.flow_from_directory(str(tmpdir), color_mode='cmyk') with pytest.raises(ValueError): generator.flow_from_directory(str(tmpdir), class_mode='output') def preprocessing_function(x): """This will fail if not provided by a Numpy array. Note: This is made to enforce backward compatibility. """ assert x.shape == (26, 26, 3) assert type(x) is np.ndarray return np.zeros_like(x) # Test usage as Sequence generator = image.ImageDataGenerator( preprocessing_function=preprocessing_function) dir_seq = generator.flow_from_directory(str(tmpdir), target_size=(26, 26), color_mode='rgb', batch_size=3, class_mode='categorical') assert len(dir_seq) == np.ceil(count / 3) x1, y1 = dir_seq[1] assert x1.shape == (3, 26, 26, 3) assert y1.shape == (3, num_classes) x1, y1 = dir_seq[5] assert (x1 == 0).all() with pytest.raises(ValueError): x1, y1 = dir_seq[9]
from keras_preprocessing import image import pandas as pd import numpy as np from keras.models import Sequential from keras.layers import Conv2D, MaxPooling2D, Activation, Dense, Flatten, Dropout from keras.models import load_model from keras.optimizers import SGD datagen_train = image.ImageDataGenerator(rescale=1. / 255, rotation_range=10, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, zoom_range=0.2, fill_mode="nearest", validation_split=0.20) datagen_test = image.ImageDataGenerator(rescale=1. / 255) traindf = pd.read_csv('trainLabels.csv') testdf = pd.read_csv('testLabels.csv') def append_text(fn): return str(fn) + '.BMP' traindf['ID'] = traindf['ID'].apply(append_text) training = datagen_train.flow_from_dataframe(dataframe=traindf, directory="./train/",
def test_image_data_generator_with_split_value_error(self): with pytest.raises(ValueError): generator = image.ImageDataGenerator(validation_split=5)
def main(args): # set the image parameters img_rows = args.img_rows img_cols = args.img_cols img_dim_color = args.img_channels # mix_prop = 1.0 # set the value of the mixing proportion ############################################################################################################# ################################## DEFINING MODEL ########################################################## ############################################################################################################## model_alex = cnn_hybrid_color_single(img_rows, img_cols, img_dim_color) # load the model # model_final = Model(model_alex.input, model_alex.output) # specify the input and output of the model model_final = model_alex print(model_final.summary()) # print the model summary plot_model(model_final, to_file='./NIN_hybrid_bin_resnet_1x1-class', show_shapes=True) # save the model summary as a png file lr = args.learning_rate # set the learning rate # set the optimizer optimizer = SGD(lr=lr, decay=1e-6, momentum=0.9) # model compilation model_final.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) # print the value of the learning rate print(K.get_value(optimizer.lr)) # -------------------------------------------------- ############################################################################################################# ########################## GETTING TRAINING DATA AND TESTING DATA ########################################## ############################################################################################################## # get the training data by calling the pairs function # read the training data train_pairs_r, training_data_r, training_label_r = read_pairs( args.tr_img_lab_r) train_pairs_l, training_data_l, training_label_l = read_pairs( args.tr_img_lab_l) assert len(training_data_r) == len(training_data_l) # combine the left and right image in the training data to make a X x Y x 6 tensor training_data = [] for i in range(len(training_data_r)): # define the stereo pair stereo_pair = [training_data_r[i], training_data_l[i]] training_data.append(stereo_pair) batch_num = 0 # initialize the live samples and fake samples live_samples_ub = 0 attack_samples_ub = 0 live_samples = [] live_labels = [] attack_samples = [] attack_labels = [] # separate the live samples and fake samples to balance the both classes, i.e. live class and fake class assert len(training_label_r) == len(training_label_l) for i in range(len(training_data)): if training_label_r[i] == 0: live_samples.append(training_data[i]) live_labels.append(training_label_r[i]) live_samples_ub += 1 elif (training_label_r[i] == 1) | (training_label_r[i] == 2) | ( training_label_r[i] == 3): # protocol_1 attack_samples.append(training_data[i]) attack_labels.append(training_label_r[i]) attack_samples_ub += 1 print("Live samples are %g ,\t attack samples are %g" % (live_samples_ub, attack_samples_ub)) # compute the difference; the live samples are always less than the fake samples in our case diff = 0 if live_samples_ub < attack_samples_ub: # compute the ratio diff = np.int(attack_samples_ub / live_samples_ub) print("The difference is :%g " % (diff)) else: ValueError("The fake samples are less than then live samples") # number of times the dataset has to be copied: live_samples_b = live_samples live_labels_b = live_labels for i in range(diff - 1): # print("length before balancing: %g" %len(live_samples_b)) sl_copy = live_samples.copy() ll_copy = live_labels.copy() live_samples_b = live_samples_b + sl_copy live_labels_b = live_labels_b + ll_copy # print("length after balancing: %g" % len(live_samples_b)) # balanced data training_data_balanced = live_samples_b + attack_samples training_label_balanced = live_labels_b + attack_labels print("Balanced data samples: %g" % len(training_data_balanced)) # get the length of the training data len_tr = len(training_data_balanced) # get the number equal to the length of the training data indices_tr = np.arange(len_tr) np.random.shuffle(indices_tr) # initialize the image counter images_read = 0 train_img_data_r = [] train_img_data_l = [] for i in indices_tr: if training_label_balanced[i] > 0: training_label_balanced[i] = 1 train_img_data_r.append( [training_data_balanced[i][0], training_label_balanced[i]]) # read the right image train_img_data_l.append( [training_data_balanced[i][1], training_label_balanced[i]]) # read the left image # print(training_data_balanced[i][1]) # cv2.imshow('img1', cv2.imread(training_data_balanced[i][0])) # cv2.waitKey() # cv2.imshow('img2', cv2.imread(training_data_balanced[i][1])) # cv2.waitKey() images_read += 1 sys.stdout.write('train images read = {0}\r'.format(images_read)) sys.stdout.flush() ############################################################################################################ # read the test data test_pairs, test_data_r, test_labels_r = read_pairs(args.tst_img_lab_r) test_pairs, test_data_l, test_labels_l = read_pairs(args.tst_img_lab_l) assert len(test_data_r) == len(test_data_l) # combine the left and right image in the training data to make a X x Y x 6 tensor test_data = [] for i in range(len(test_data_r)): # define the stereo pair stereo_pair_t = [test_data_r[i], test_data_l[i]] test_data.append(stereo_pair_t) test_labels = test_labels_r images_read = 0 # get the length of the training data len_test = len(test_data) # get the number equal to the length of the training data indices_test = np.arange(len_test) test_img_data_r = [] test_img_data_l = [] for i in indices_test: if test_labels[i] > 0: test_labels[i] = 1 test_img_data_r.append([test_data[i][0], test_labels[i]]) # read the right test image test_img_data_l.append([test_data[i][1], test_labels[i]]) # red the left test image images_read += 1 sys.stdout.write('test images read = {0}\r'.format(images_read)) sys.stdout.flush() ##################################################################################################### # make all the data in panda data frame format train_df_r = pd.DataFrame(train_img_data_r) train_df_r.columns = ['id_r', 'label'] train_df_l = pd.DataFrame(train_img_data_l) train_df_l.columns = ['id_l', 'label'] test_df_r = pd.DataFrame(test_img_data_r) test_df_r.columns = ['id_r', 'label'] test_df_l = pd.DataFrame(test_img_data_l) test_df_l.columns = ['id_l', 'label'] ########################################################################################################333 datagen = image.ImageDataGenerator() train_generator_r = datagen.flow_from_dataframe( dataframe=train_df_r, directory=None, x_col='id_r', y_col='label', has_ext=True, batch_size=args.batch_size, seed=42, shuffle=True, class_mode="sparse", target_size=(args.img_rows, args.img_cols), color_mode='grayscale', interpolation='nearest', drop_duplicates=False) train_generator_l = datagen.flow_from_dataframe( dataframe=train_df_l, directory=None, x_col='id_l', y_col='label', has_ext=True, batch_size=args.batch_size, seed=42, shuffle=True, class_mode="sparse", target_size=(args.img_rows, args.img_cols), color_mode='grayscale', interpolation='nearest', drop_duplicates=False) test_datagen = image.ImageDataGenerator() test_generator_r = test_datagen.flow_from_dataframe( dataframe=test_df_r, directory=None, x_col='id_r', y_col='label', has_ext=True, batch_size=args.batch_size, seed=42, shuffle=False, class_mode="sparse", target_size=(args.img_rows, args.img_cols), color_mode='grayscale', interpolation='nearest') test_generator_l = test_datagen.flow_from_dataframe( dataframe=test_df_l, directory=None, x_col='id_l', y_col='label', has_ext=True, batch_size=args.batch_size, seed=42, shuffle=False, class_mode="sparse", target_size=(args.img_rows, args.img_cols), color_mode='grayscale', interpolation='nearest') ############################################################################################################# batch_num = 0 while batch_num < args.max_epochs: start_time = time.time() # initialize the clock acc = [] loss = [] sub_count = 0 total_batch = train_generator_r.n // train_generator_r.batch_size for i in range(train_generator_r.n // train_generator_r.batch_size): x1, y1 = next(train_generator_r) x2, y2 = next(train_generator_l) # only for DP-3D for comparison # disparity_final = [] # # for j in range(x1.shape[0]): # img1 = np.asarray(x1[j]) # # img1 = cv2.resize(img1, (img_rows, img_cols), # # interpolation=cv2.INTER_AREA) # # img2 = np.asarray(x2[j]) # # img2 = cv2.resize(img2, (img_rows, img_cols), # # interpolation=cv2.INTER_AREA) # # # disparity = cv2.subtract(img1,img2) # # der_k = np.asarray([[1.0, 2.0, 1.0], # [0.0, 0.0, 0.0], # [-1.0, -2.0, -1.0]]) # # der = cv2.filter2D(img1,-1, kernel=der_k) # # disparity_f = disparity/(der+0.005) # # disparity_final.append(disparity_f) # # disparity_final = np.expand_dims(np.asarray(disparity_final).astype('float32'),axis=-1) x1 = x1.astype('float32') / 255 x2 = x2.astype('float32') / 255 y = y1 tr_acc1 = model_final.fit([x1, x2], y, epochs=1, verbose=0) acc.append(tr_acc1.history['acc'][0]) loss.append(tr_acc1.history['loss'][0]) sub_count += 1 sys.stdout.write('batch_count = {0} of {1} \r'.format( sub_count, total_batch)) sys.stdout.flush() train_acc = np.sum(np.asarray(acc)) * 100 / ( train_generator_r.n // train_generator_r.batch_size) train_loss = np.sum(np.asarray(loss)) * 100 / ( train_generator_r.n // train_generator_r.batch_size) print('training_acc: {0} \t training_loss: {1}'.format( train_acc, train_loss)) print( '______________________________________________________________________' ) print('Running the evaluations') test_acc = [] test_loss = [] sub_count = 0 for i in range(test_generator_r.n // test_generator_r.batch_size): x1, y1 = next(test_generator_r) x2, y2 = next(test_generator_l) # only for DP-3D for comparison # disparity_final = [] # # for j in range(x1.shape[0]): # img1 = np.asarray(x1[j]) # # img1 = cv2.resize(img1, (img_rows, img_cols), # # interpolation=cv2.INTER_AREA) # # img2 = np.asarray(x2[j]) # # img2 = cv2.resize(img2, (img_rows, img_cols), # # interpolation=cv2.INTER_AREA) # # # disparity = cv2.subtract(img1, img2) # # der_k = np.asarray([[1.0, 2.0, 1.0], # [0.0, 0.0, 0.0], # [-1.0, -2.0, -1.0]]) # # der = cv2.filter2D(img1, -1, kernel=der_k) # # disparity_f = disparity / (der + 0.005) # # disparity_final.append(disparity_f) # # disparity_final = np.expand_dims(np.asarray(disparity_final).astype('float32'),axis=-1) x1 = x1.astype('float32') / 255 x2 = x2.astype('float32') / 255 y = y1 tst_loss, tst_acc1 = model_final.evaluate([x1, x2], y, verbose=0) test_acc.append(tst_acc1) test_loss.append(tst_loss) sub_count += 1 sys.stdout.write('epoch_count = {0}\r'.format(sub_count)) sys.stdout.flush() test_acc = np.sum(np.asarray(test_acc)) * 100 / ( test_generator_r.n // test_generator_r.batch_size) test_loss = np.sum(np.asarray(test_loss)) * 100 / ( test_generator_r.n // test_generator_r.batch_size) print('test_acc: {0} \t test_loss: {1}'.format(test_acc, test_loss)) batch_num += 1 # ********************************************************************************************** # learning rate schedule update: if learning is done using a single learning give the batch_num below a # high value if (batch_num == 3) | (batch_num == 5) | (batch_num == 7): lr = 0.1 * lr K.set_value(optimizer.lr, lr) print(K.get_value(optimizer.lr)) # ************************************************************************************************ # ----------------------------------------------------------------------------------------------- end_time = time.time() - start_time print("Total time taken %f :" % end_time) model_final.save_weights( '/home/yaurehman2/Documents/stereo_face_liveness/stereo_ckpt/Conventional/' + 'dual_grayscale_input_revised_protocol_1_' + str(args.max_epochs) + '.h5')
def trainGrayscale(model): labels = ['No Finding','Enlarged Cardiomediastinum', 'Cardiomegaly','Lung Opacity','Lung Lesion', 'Edema','Consolidation','Pneumonia','Atelectasis', 'Pneumothorax','Pleural Effusion','Pleural Other','Fracture','Support Devices'] datagen = image.ImageDataGenerator(rescale=1. / 255) traindf = pd.read_csv(TRAIN) validatedf = pd.read_csv(VAL) testdf = pd.read_csv(TEST) traingenerator = datagen.flow_from_dataframe(traindf, directory=None, color_mode='grayscale', target_size=(256, 256), x_col='Path', y_col=labels, class_mode="other", shuffle=True, batch_size=BATCH, drop_duplicates=False) validategenerator = datagen.flow_from_dataframe(validatedf, directory=None, color_mode='grayscale', target_size=(256, 256), x_col='Path', y_col=labels, class_mode="other", shuffle=False, batch_size=BATCH, drop_duplicates=False) testgenerator = datagen.flow_from_dataframe(testdf, directory=None, color_mode='grayscale', target_size=(256, 256), x_col='Path', y_col=labels, class_mode="other", shuffle=False, batch_size=BATCH, drop_duplicates=False) print(traingenerator.n) print(validategenerator.n) print(testgenerator.n) filepath = "DenseNet-grayscale-{epoch:02d}-{val_loss:.2f}.hdf5" checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=False, mode='min') clr = CyclicLR(base_lr=0.0001, max_lr=0.0006, step_size=2000.) es = EarlyStopping(monitor="val_loss",mode=min, verbose=1) callbacks_list = [checkpoint,clr,es,roc_callback(testgenerator,np.array(testgenerator.labels))] model.fit_generator(generator=traingenerator, validation_data=validategenerator, epochs=EPOCHS, steps_per_epoch=traingenerator.n / BATCH, validation_steps=validategenerator.n / BATCH, callbacks=callbacks_list, workers=THREAD, verbose=1) model.save_weights("DenseNet.h5") model.save('DenseNet.h5')
# and a logistic layer -- let's say we have 200 classes model.add(Dense(num_classes, activation='softmax')) # first: train only the top layers (which were randomly initialized) # i.e. freeze all convolutional InceptionV3 layers for layer in base_model.layers: layer.trainable = False # compile the model (should be done *after* setting layers to non-trainable) # initiate RMSprop optimizer opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6) model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) datagen = processing.ImageDataGenerator(rescale=1. / 255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True) train_generator = datagen.flow_from_directory(file_path, target_size=(img_height, img_width), batch_size=batch_size, class_mode='categorical') y_train = train_generator.classes class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train) model.fit_generator(train_generator, class_weight=class_weights)
fromarray = Image.fromarray KPImage.pil_image = NumpyPIL # Prepare datasets print('Preparing Data..') img_gen_params = dict( vertical_flip=True, height_shift_range=0.05, width_shift_range=0.02, rotation_range=3.0, zoom_range=0.05, #preprocessing_function=preprocess_input ) img_gen = KPImage.ImageDataGenerator(**img_gen_params) def flow_from_dataframe(img_data_gen, in_df, path_col, y_col, seed=None, **dflow_args): base_dir = os.path.dirname(in_df[path_col].values[0]) df_gen = img_data_gen.flow_from_directory(base_dir, class_mode='sparse', seed=seed, **dflow_args) df_gen.filenames = in_df[path_col].values df_gen.classes = np.stack(in_df[y_col].values, 0)
img_width = 56 # generate data datagen = processing.ImageDataGenerator( rescale=1. / 255, shear_range=0.2, featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization=False, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening zca_epsilon=1e-06, # epsilon for ZCA whitening rotation_range=0, # randomly rotate images in the range (degrees, 0 to 180) # randomly shift images horizontally (fraction of total width) width_shift_range=0.1, # randomly shift images vertically (fraction of total height) height_shift_range=0.1, zoom_range=0., # set range for random zoom channel_shift_range=0., # set range for random channel shifts # set mode for filling points outside the input boundaries fill_mode='nearest', cval=0., # value used for fill_mode = "constant" horizontal_flip=True, # randomly flip images vertical_flip=False, # randomly flip images # set function that will be applied on each input preprocessing_function=None, # image data format, either "channels_first" or "channels_last" data_format=None, # fraction of images reserved for validation (strictly between 0 and 1) validation_split=0.0) train_generator = datagen.flow_from_directory(file_path, target_size=(img_height,
def test_dataframe_iterator_with_validation_split(self, validation_split, num_training, tmpdir): num_classes = 2 # save the images in the tmpdir count = 0 filenames = [] filenames_without = [] for test_images in self.all_test_images: for im in test_images: filename = "image-{}.png".format(count) filename_without = "image-{}".format(count) filenames.append(filename) filenames_without.append(filename_without) im.save(str(tmpdir / filename)) count += 1 df = pd.DataFrame({ "filename": filenames, "class": [random.randint(0, 1) for _ in filenames] }) df_without_ext = pd.DataFrame({ "filename": filenames_without, "class": [random.randint(0, 1) for _ in filenames_without] }) # create iterator generator = image.ImageDataGenerator(validation_split=validation_split) df_sparse_iterator = generator.flow_from_dataframe(df, str(tmpdir), has_ext=True, class_mode="sparse") if np.isnan(next(df_sparse_iterator)[:][1]).any(): raise ValueError('Invalid values.') with pytest.raises(ValueError): generator.flow_from_dataframe(df, tmpdir, has_ext=True, subset='foo') train_iterator = generator.flow_from_dataframe(df, str(tmpdir), has_ext=True, subset='training') assert train_iterator.samples == num_training valid_iterator = generator.flow_from_dataframe(df, str(tmpdir), has_ext=True, subset='validation') assert valid_iterator.samples == count - num_training train_iterator_without = generator.flow_from_dataframe( df_without_ext, str(tmpdir), has_ext=False, subset='training') assert train_iterator_without.samples == num_training valid_iterator_without = generator.flow_from_dataframe( df_without_ext, str(tmpdir), has_ext=False, subset='validation') assert valid_iterator_without.samples == count - num_training # check number of classes and images assert len(train_iterator.class_indices) == num_classes assert len(train_iterator.classes) == num_training assert len(set(train_iterator.filenames) & set(filenames)) == num_training intersection = set(train_iterator_without.filenames) & set(filenames) assert len(intersection) == num_training
base_model = InceptionResNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3)) base_model.summary() model.add(base_model) model.add(GlobalAveragePooling2D()) # # # let's add a fully-connected layer model.add(Dense(num_classes, activation='softmax')) model.summary() return model if __name__ == '__main__': processing.ImageDataGenerator(rescale=1. / 255) img_height, img_width = 224, 224 input_shape = (img_height, img_width, 3) num_classes = 103 test_path = 'E:\\Data\\image_debug\\' model_weight_path = 'E:\\PythonSource\\inception_landmark\\keras_landmark_inception_resnet_v2_model.18-0.92.h5 ' batch_size = 32 # init class map index class_str = [] for i in range(num_classes): class_str.append(str(i)) class_str = sorted(class_str) # init
def test_valid_args(self): with pytest.raises(ValueError): dt = image.ImageDataGenerator(brightness_range=0.1)
# top2_acc = functools.partial(top_k_categorical_accuracy, k=2) # # top2_acc.__name__ = 'top2_acc' def top2_acc(y_true, y_pred): return functools.partial(top_k_categorical_accuracy, k=2) model = load_model( '/media/raghu/6A3A-B7CD/glare_resnet_models/models-0.7209-0.7751.hdf5') validation_dir = inputpath + '/val' validation_datagen = image.ImageDataGenerator(rescale=1. / 255) val_batchsize = 10 # Create a generator for prediction validation_generator = validation_datagen.flow_from_directory( validation_dir, target_size=(image_size, image_size), batch_size=val_batchsize, class_mode='categorical', shuffle=False) # Get the filenames from the generator fnames = validation_generator.filenames # Get the ground truth from generator ground_truth = validation_generator.classes
import tensorflow as tf from tensorflow import keras from keras_preprocessing import image """ #%% import glob from PIL import Image directory='Images/' name_list = glob.glob(directory + '*/*') print(name_list) """ #%% #preprocessing directory = 'Images/' data_gen = image.ImageDataGenerator(rescale=1. / 255, validation_split=0.1) train_gen = data_gen.flow_from_directory(directory, batch_size=20, target_size=(150, 150), class_mode='sparse', subset='training') val_gen = data_gen.flow_from_directory(directory, batch_size=20, target_size=(150, 150), class_mode='sparse', subset='validation') """ for i in range(1): print(train_gen[16506]) train_gen.next() #print(val_gen.class_indices)