def data_augment(x_train, y_train, batch_size, val_split=0.25, task='classification', num_classes=None): h, w = x_train.shape[1:3] # Shuffle training data and split some for validation seed = 1 m = len(x_train) np.random.seed(seed) index = np.arange(0, m) random.shuffle(index) x_train = x_train[index] y_train = y_train[index] split = int(m * (1 - val_split)) x_train, x_val = x_train[:split], x_train[split:] y_train, y_val = y_train[:split], y_train[split:] #Use shift、flip and rotation to augment data if task == 'classification': datagen = ImageDataGenerator(rotation_range=10, width_shift_range=0.1, height_shift_range=0.1, validation_split=val_split, horizontal_flip=True) train_flow = datagen.flow(x_train, y_train, batch_size=batch_size) elif task == 'detection_single': ssd_data_augmentation = SSDDataAugmentation(img_height=h, img_width=w) normalize = Normalize(h, w) gen = DataGenerator() box = y_train[:, -4:] * [w, h, w, h] class_id = np.argmax(y_train[:, :-4, np.newaxis], axis=1) labels = np.hstack([class_id, box]) labels = labels[:, np.newaxis, :].tolist() gen.images = x_train * 255 gen.labels = labels gen.dataset_size = len(labels) gen.dataset_indices = np.arange(gen.dataset_size) gen.filenames = ['x' for i in range(gen.dataset_size)] train_flow = gen.generate( batch_size=batch_size, shuffle=True, transformations=[ssd_data_augmentation, normalize], label_encoder=PetLabelEncoder(num_classes), returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) datagen = ImageDataGenerator() val_flow = datagen.flow(x_val, y_val, batch_size=batch_size) return (train_flow, val_flow)
# want to create HDF5 datasets, comment out the subsequent two function calls. # train_dataset.create_hdf5_dataset(file_path='data/image_data/dataset_pascal_voc_07+12_trainval.h5', # resize=False, # variable_image_size=True, # verbose=True) # val_dataset.create_hdf5_dataset(file_path='data/image_data/dataset_pascal_voc_07_test.h5', # resize=False, # variable_image_size=True, # verbose=True) train_dataset.hdf5_dataset = h5py.File( 'data/image_data/dataset_pascal_voc_07+12_trainval.h5', 'r') train_dataset.hdf5_dataset_path = 'data/image_data/dataset_pascal_voc_07+12_trainval.h5' train_dataset.dataset_size = len(train_dataset.hdf5_dataset['images']) train_dataset.dataset_indices = np.arange(train_dataset.dataset_size, dtype=np.int32) val_dataset.hdf5_dataset = h5py.File( 'data/image_data/dataset_pascal_voc_07_test.h5', 'r') val_dataset.hdf5_dataset_path = 'data/image_data/dataset_pascal_voc_07_test.h5' val_dataset.dataset_size = len(val_dataset.hdf5_dataset['images']) val_dataset.dataset_indices = np.arange(val_dataset.dataset_size, dtype=np.int32) # 3: Set the batch size. batch_size = 8 # Change the batch size if you like, or if you run into GPU memory issues. # 4: Set the image transformations for pre-processing and data augmentation options. # For the training generator: