def __init__(self, inputPath, parameters): IAugmentor.__init__(self) self.inputPath = inputPath self.imagesPath = inputPath + "images/" self.labelsPath = inputPath + "labels/" # output path represents the folder where the images will be stored if parameters["outputPath"]: self.outputPath = parameters["outputPath"] else: raise ValueError( "You should provide an output path in the parameters") self.generators = [] if parameters["width"]: self.width = parameters["width"] else: raise ValueError("You should provide a width in the parameters") if parameters["height"]: self.width = parameters["height"] else: raise ValueError("You should provide a height in the parameters") if parameters["labelsExtension"]: self.labelsExtension = parameters["labelsExtension"] else: self.labelsExtension = ".tiff" self.aw = AspectAwarePreprocessor(self.width, self.height)
def applyAugmentation(self, passes=np.inf): epochs = 0 aap = AspectAwarePreprocessor(self.width, self.height) while epochs < passes: for i in np.arange(0, self.numImages, self.batchSize): imagPaths = self.imagePaths[i:i + self.batchSize] labPaths = self.labelPaths[i:i + self.batchSize] images = [ aap.preprocess(cv2.imread(imagePath)) for imagePath in imagPaths ] labels = [ aap.preprocess(cv2.imread(labelPath)) for labelPath in labPaths ] images_labels = [ readAndGenerateImageSegmentation(image, label, self.generators) for (image, label) in zip(images, labels) ] images = [i[0] for i in images_labels] labels = [i[1] for i in images_labels] yield (images, labels) epochs += 1
class HDF5LinearClassificationAugmentor: # All images must have same width and height def __init__(self,inputPath,parameters): IAugmentor.__init__(self) self.inputPath = inputPath # output path represents the h5py file where dataset will be stored if parameters["outputPath"]: self.outputPath = parameters["outputPath"] else: raise ValueError("You should provide an output path in the parameters") self.generators = [] if parameters["width"]: self.width = parameters["width"] else: raise ValueError("You should provide a width in the parameters") if parameters["height"]: self.width = parameters["height"] else: raise ValueError("You should provide a height in the parameters") self.aw = AspectAwarePreprocessor(self.width,self.height) def addGenerator(self, generator): self.generators.append(generator) def readImagesAndAnnotations(self): self.imagePaths = list(paths.list_images(self.inputPath)) def applyAugmentation(self): self.readImagesAndAnnotations() le = LabelEncoder() labels = [p.split(os.path.sep)[-2] for p in self.imagePaths] labels = le.fit_transform(labels) writer = HDF5DatasetWriterClassification((len(self.imagePaths)*len(self.generators),self.width,self.height,3), self.outputPath) # We need to define this function outside to work in parallel. writer.storeClassLabels(le.classes_) widgets = ["Processing images: ", progressbar.Percentage(), " ", progressbar.Bar(), " ", progressbar.ETA()] pbar = progressbar.ProgressBar(maxval=len(self.imagePaths), widgets=widgets).start() for i_and_imagePath in enumerate(zip(self.imagePaths,labels)): (i, (imagePath,label)) = i_and_imagePath image = cv2.imread(imagePath) image = self.aw.preprocess(image) for (j, generator) in enumerate(self.generators): newimage = generator.applyForClassification(image) newimage = self.aw.preprocess(newimage) writer.add([newimage],[label]) pbar.update(i) writer.close() pbar.finish()
def process_folder(args): # grab the list of images that we'll be describing, then extract # the class label names from the image paths print("[INFO] loading images...") imagePaths = list(paths.list_images(args["dataset"])) classNames = [pt.split(os.path.sep)[-2] for pt in imagePaths] classNames = [str(x) for x in np.unique(classNames)] # initialize the image preprocessors aap = AspectAwarePreprocessor(IM_WIDTH, IM_HEIGHT) iap = ImageToArrayPreprocessor() # load the dataset from disk then scale the raw pixel intensities to # the range [0, 1] sdl = SimpleDatasetLoader(preprocessors=[aap, iap]) (data, labels) = sdl.load(imagePaths, verbose=500) data = data.astype("float") / 255.0 # partition the data into training and testing splits using 75% of # the data for training and the remaining 25% for testing (trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.25, random_state=42) # convert the labels from integers to vectors trainY = LabelBinarizer().fit_transform(trainY) testY = LabelBinarizer().fit_transform(testY) return trainX, testX, trainY, testY, classNames
def __init__(self,inputPath,parameters): IAugmentor.__init__(self) self.inputPath = inputPath # output path represents the h5py file where dataset will be stored if parameters["outputPath"]: self.outputPath = parameters["outputPath"] else: raise ValueError("You should provide an output path in the parameters") self.generators = [] if parameters["width"]: self.width = parameters["width"] else: raise ValueError("You should provide a width in the parameters") if parameters["height"]: self.width = parameters["height"] else: raise ValueError("You should provide a height in the parameters") self.aw = AspectAwarePreprocessor(self.width, self.height)
def applyAugmentation(self,passes=np.inf): epochs = 0 aap = AspectAwarePreprocessor(self.width,self.height) batch_features = np.zeros((self.batchSize, self.width, self.height, 3)) batch_labels = np.zeros((self.batchSize, self.classes)) while epochs < passes: for i in np.arange(0, self.numImages, self.batchSize): imagPaths = self.imagePaths[i:i+self.batchSize] labels = self.labels[i:i+self.batchSize] images = [cv2.imread(imagePath) for imagePath in imagPaths] images = [aap.preprocess(readAndGenerateImage(image,self.generators)) for image in images] for j in range(self.batchSize): index = random.randint(0,len(images)-1) batch_features[j] = images[index] batch_labels[j] = labels[index] yield (batch_features,batch_labels) epochs += 1
def main(): """Use transfer learning and fine-tuning to train a network on a new dataset""" a = argparse.ArgumentParser() a.add_argument("-d", "--dataset", required=True, help="path to input dataset") a.add_argument("-m", "--model", required=True, help="output model file") a.add_argument("--plot", action="store_true") args = a.parse_args() if (not os.path.exists(args.dataset)): print("directories do not exist") sys.exit(1) # construct the image generator for data augmentation aug = ImageDataGenerator(rotation_range=30, width_shift_range=0.1, height_shift_range=0.1, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, fill_mode="nearest") # grab the list of images that we'll be describing, then extract # the class label names from the image paths print("[INFO] loading images...") imagePaths = list(paths.list_images(args.dataset)) classNames = [pt.split(os.path.sep)[-2] for pt in imagePaths] classNames = [str(x) for x in np.unique(classNames)] # initialize the image preprocessors aap = AspectAwarePreprocessor(IM_WIDTH, IM_HEIGHT) iap = ImageToArrayPreprocessor() # load the dataset from disk then scale the raw pixel intensities to # the range [0, 1] sdl = SimpleDatasetLoader(preprocessors=[aap, iap]) (data, labels) = sdl.load(imagePaths, verbose=500) data = data.astype("float") / 255.0 # partition the data into training and testing splits using 75% of # the data for training and the remaining 25% for testing (trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.25, random_state=42) # convert the labels from integers to vectors trainY = LabelBinarizer().fit_transform(trainY) testY = LabelBinarizer().fit_transform(testY) # setup model base_model = InceptionV3( weights='imagenet', include_top=False) #include_top=False excludes final FC layer model = add_new_fc_layer(base_model, len(classNames)) # transfer learning by turning off all conv layers setup_to_transfer_learn(model, base_model) # train the head of the network for a few epochs (all other # layers are frozen) -- this will allow the new FC layers to # start to become initialized with actual "learned" values # versus pure random print("[INFO] training head...") history_tl = model.fit_generator(aug.flow(trainX, trainY, batch_size=16), validation_data=(testX, testY), epochs=TL_EPOCHS, steps_per_epoch=len(trainX) // 32, verbose=1) # evaluate the network after initialization print("[INFO] evaluating after initialization...") predictions = model.predict(testX, batch_size=16) print( classification_report(testY.argmax(axis=1), predictions.argmax(axis=1), target_names=classNames)) # plot(history_tl, TL_EPOCHS, "inc_tl_plot.png") # fine-tuning setup_to_finetune(model) # for the changes to the model to take affect we need to recompile # the model, this time using SGD with a *very* small learning rate print("[INFO] re-compiling model...") opt = SGD(lr=0.001) model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"]) # train the model again, this time fine-tuning *both* the final set # of CONV layers along with our set of FC layers print("[INFO] fine-tuning model...") history_ft = model.fit_generator(aug.flow(trainX, trainY, batch_size=16), validation_data=(testX, testY), epochs=FT_EPOCHS, steps_per_epoch=len(trainX) // 16, verbose=1) # evaluate the network on the fine-tuned model print("[INFO] evaluating after fine-tuning...") predictions = model.predict(testX, batch_size=16) print( classification_report(testY.argmax(axis=1), predictions.argmax(axis=1), target_names=classNames)) plot(history_ft, FT_EPOCHS, "inc_ft_plot.png") # save the model to disk print("[INFO] serializing model...") model.save(args.model)
class HDF5PowerSegmentationAugmentor: # All images must have same width and height def __init__(self, inputPath, parameters): IAugmentor.__init__(self) self.inputPath = inputPath self.imagesPath = inputPath + "images/" self.labelsPath = inputPath + "labels/" # output path represents the folder where the images will be stored if parameters["outputPath"]: self.outputPath = parameters["outputPath"] else: raise ValueError( "You should provide an output path in the parameters") self.generators = [] if parameters["width"]: self.width = parameters["width"] else: raise ValueError("You should provide a width in the parameters") if parameters["height"]: self.width = parameters["height"] else: raise ValueError("You should provide a height in the parameters") if parameters["labelsExtension"]: self.labelsExtension = parameters["labelsExtension"] else: self.labelsExtension = ".tiff" self.aw = AspectAwarePreprocessor(self.width, self.height) def addGenerator(self, generator): self.generators.append(generator) def readImagesAndAnnotations(self): self.imagePaths = list( paths.list_files(self.imagesPath, validExts=(".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif"))) self.labelPaths = list( paths.list_files(self.labelsPath, validExts=(".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif"))) if (len(self.imagePaths) != len(self.labelPaths)): raise Exception( "The number of files is different in the folder of images and in the folder of labels" ) def applyAugmentation(self): self.readImagesAndAnnotations() widgets = [ "Processing images: ", progressbar.Percentage(), " ", progressbar.Bar(), " ", progressbar.ETA() ] pbar = progressbar.ProgressBar(maxval=len(self.imagePaths), widgets=widgets).start() writer = HDF5DatasetWriterSegmentation( (len(self.imagePaths) * (2**(len(self.generators) - 1)), self.width, self.height, 3), self.outputPath) for i_and_imagePath in enumerate(self.imagePaths): (i, imagePath) = i_and_imagePath image = cv2.imread(imagePath) image = self.aw.preprocess(image) name = imagePath.split(os.path.sep)[-1] labelPath = '/'.join( imagePath.split(os.path.sep)[:-2] ) + "/labels/" + name[0:name.rfind(".")] + self.labelsExtension label = cv2.imread(labelPath) label = self.aw.preprocess(label) images = [image] labels = [label] for (j, generator) in enumerate(self.generators): newimages = [] newlabels = [] for (k, (im, la)) in enumerate(zip(images, labels)): (newimage, newlabel) = generator.applyForSegmentation(im, la) newimage = self.aw.preprocess(newimage) newlabel = self.aw.preprocess(newlabel) writer.add([newimage], [newlabel]) newimages.append(newimage) newlabels.append(newlabel) images = newimages labels = newlabels pbar.update(i) writer.close() pbar.finish()