def __init__(self, optimizer, do_augmentation=False): """ Initializes the Data class. """ self.Helpers = Helpers("Data", False) self.optimizer = optimizer self.do_augmentation = do_augmentation if self.do_augmentation == False: self.seed = self.Helpers.confs["cnn"]["data"]["seed_" + self.optimizer] self.dim = self.Helpers.confs["cnn"]["data"]["dim"] else: self.Augmentation = Augmentation("cnn", self.optimizer) self.seed = self.Helpers.confs["cnn"]["data"]["seed_" + self.optimizer + "_augmentation"] self.dim = self.Helpers.confs["cnn"]["data"]["dim_augmentation"] seed(self.seed) random.seed(self.seed) self.data = [] self.labels = [] self.Helpers.logger.info("Data class initialization complete.")
def process_data(self, df): """ Processes the data. """ count = 0 neg_count = 0 pos_count = 0 dl = len(df) self.data = np.zeros( (dl * 9, self.dim, self.dim, 3), dtype=np.float32) self.labels = np.zeros( (dl * 9, 2), dtype=np.float32) aug = Augmentation() for d in range(0, dl): img = df.iloc[d]['img'] if img in self.Helpers.confs["data"]["test_0"] or img in self.Helpers.confs["data"]["test_1"]: self.Helpers.logger.info("Skipping Test Image: " + img) continue self.Helpers.logger.info("Augmenting Train Image: " + img) image = self.resize(img, self.dim) label = df.iloc[d]['lbl'] print(label) if image.shape[2] == 1: image = np.dstack([image, image, image]) self.data[count] = image.astype(np.float32)/255. self.labels[count] = tf.keras.utils.to_categorical( label, num_classes=2) self.data[count+1] = aug.grayscale(image) self.labels[count+1] = tf.keras.utils.to_categorical( label, num_classes=2) self.data[count+2] = aug.equalize_hist(image) self.labels[count+2] = tf.keras.utils.to_categorical( label, num_classes=2) horizontal, vertical = aug.reflection(image) self.data[count+3] = horizontal self.labels[count+3] = tf.keras.utils.to_categorical( label, num_classes=2) self.data[count+4] = vertical self.labels[count+4] = tf.keras.utils.to_categorical( label, num_classes=2) self.data[count+5] = aug.gaussian(image) self.labels[count+5] = tf.keras.utils.to_categorical( label, num_classes=2) self.data[count+6] = aug.translate(image) self.labels[count+6] = tf.keras.utils.to_categorical( label, num_classes=2) self.data[count+7] = aug.shear(image) self.labels[count+7] = tf.keras.utils.to_categorical( label, num_classes=2) self.data[count+8] = aug.rotation(image) self.labels[count+8] = tf.keras.utils.to_categorical( label, num_classes=2) if label == 0: neg_count += 9 else: pos_count += 9 count += 9 self.pshuffle() self.convert_data() self.Helpers.logger.info("Raw data: " + str(count)) self.Helpers.logger.info("Raw negative data: " + str(neg_count)) self.Helpers.logger.info("Raw positive data: " + str(count)) self.Helpers.logger.info("Augmented data: " + str(self.data.shape)) self.Helpers.logger.info("Labels: " + str(self.labels.shape)) self.get_split()
class Data(): """ Data Class Data helper class for the Paper 1 Evaluation. """ def __init__(self, optimizer, do_augmentation=False): """ Initializes the Data class. """ self.Helpers = Helpers("Data", False) self.optimizer = optimizer self.do_augmentation = do_augmentation if self.do_augmentation == False: self.seed = self.Helpers.confs["cnn"]["data"]["seed_" + self.optimizer] self.dim = self.Helpers.confs["cnn"]["data"]["dim"] else: self.Augmentation = Augmentation("cnn", self.optimizer) self.seed = self.Helpers.confs["cnn"]["data"]["seed_" + self.optimizer + "_augmentation"] self.dim = self.Helpers.confs["cnn"]["data"]["dim_augmentation"] seed(self.seed) random.seed(self.seed) self.data = [] self.labels = [] self.Helpers.logger.info("Data class initialization complete.") def data_and_labels_sort(self): """ Sorts the training data and labels for your model. """ data_dir = pathlib.Path(self.Helpers.confs["cnn"]["data"]["train_dir"]) data = list( data_dir.glob('*' + self.Helpers.confs["cnn"]["data"]["file_type"])) count = 0 neg_count = 0 pos_count = 0 for rimage in data: fpath = str(rimage) fname = os.path.basename(rimage) if "_0" in fname: neg_count += 1 else: pos_count += 1 count += 1 self.data.append((fpath, 0 if "_0" in fname else 1)) random.Random(self.seed).shuffle(self.data) self.Helpers.logger.info("All data: " + str(count)) self.Helpers.logger.info("Positive data: " + str(pos_count)) self.Helpers.logger.info("Negative data: " + str(neg_count)) def data_and_labels_prepare(self): """ Prepares the training data for your model. """ for i in range(len(self.data)): fpath = str(self.data[i][0]) image = self.resize(fpath, self.dim) if image.shape[2] == 1: image = np.dstack([image, image, image]) self.labels.append(self.data[i][1]) self.data[i] = image.astype(np.float32) / 255. self.convert_data() self.encode_labels() self.Helpers.logger.info("All data: " + str(self.data.shape)) self.Helpers.logger.info("All Labels: " + str(self.labels.shape)) def data_and_labels_augmentation_prepare(self): """ Sorts the training data for your model. """ neg_count = 0 pos_count = 0 augmented_data = [] augmented_labels = [] for i in range(len(self.data)): fpath = str(self.data[i][0]) fname = os.path.basename(fpath) label = self.data[i][1] if "_0" in fname: neg_count += 9 else: pos_count += 9 image = self.resize(fpath, self.dim) if image.shape[2] == 1: image = np.dstack([image, image, image]) augmented_data.append(image.astype(np.float32) / 255.) augmented_labels.append(label) augmented_data.append(self.Augmentation.grayscale(image)) augmented_labels.append(label) augmented_data.append(self.Augmentation.equalize_hist(image)) augmented_labels.append(label) horizontal, vertical = self.Augmentation.reflection(image) augmented_data.append(horizontal) augmented_labels.append(label) augmented_data.append(vertical) augmented_labels.append(label) augmented_data.append(self.Augmentation.gaussian(image)) augmented_labels.append(label) augmented_data.append(self.Augmentation.translate(image)) augmented_labels.append(label) augmented_data.append(self.Augmentation.shear(image)) augmented_labels.append(label) augmented_data, augmented_labels = self.Augmentation.rotation( image, label, augmented_data, augmented_labels) self.data = augmented_data self.labels = augmented_labels self.convert_data() self.encode_labels() self.Helpers.logger.info("Augmented data: " + str(self.data.shape)) self.Helpers.logger.info("All Labels: " + str(self.labels.shape)) def convert_data(self): """ Converts the training data to a numpy array. """ self.data = np.array(self.data) self.Helpers.logger.info("Data shape: " + str(self.data.shape)) def encode_labels(self): """ One Hot Encodes the labels. """ encoder = OneHotEncoder(categories='auto') self.labels = np.reshape(self.labels, (-1, 1)) self.labels = encoder.fit_transform(self.labels).toarray() self.Helpers.logger.info("Labels shape: " + str(self.labels.shape)) def shuffle(self): """ Shuffles the data and labels. """ self.data, self.labels = shuffle(self.data, self.labels, random_state=self.seed) def get_split(self): """ Splits the data and labels creating training and validation datasets. """ self.X_train, self.X_test, self.y_train, self.y_test = train_test_split( self.data, self.labels, test_size=0.255, random_state=self.seed) self.Helpers.logger.info("Training data: " + str(self.X_train.shape)) self.Helpers.logger.info("Training labels: " + str(self.y_train.shape)) self.Helpers.logger.info("Validation data: " + str(self.X_test.shape)) self.Helpers.logger.info("Validation labels: " + str(self.y_test.shape)) def resize(self, path, dim): """ Resizes an image to the provided dimensions (dim). """ return cv2.resize(cv2.imread(path), (dim, dim))
def do_im_process(self): """ Sorts the training data and labels for your model. """ aug = Augmentation() data_dir = pathlib.Path( self.Helpers.confs["cnn"]["data"]["train_dir"]) data = list(data_dir.glob( '*' + self.Helpers.confs["cnn"]["data"]["file_type"])) count = 0 neg_count = 0 pos_count = 0 augmented_data = [] augmented_labels = [] for rimage in data: fpath = str(rimage) fname = os.path.basename(rimage) label = 0 if "_0" in fname else 1 image = self.resize(fpath, self.dim) if image.shape[2] == 1: image = np.dstack( [image, image, image]) augmented_data.append(image.astype(np.float32)/255.) augmented_labels.append(label) augmented_data.append(aug.grayscale(image)) augmented_labels.append(label) augmented_data.append(aug.equalize_hist(image)) augmented_labels.append(label) horizontal, vertical = aug.reflection(image) augmented_data.append(horizontal) augmented_labels.append(label) augmented_data.append(vertical) augmented_labels.append(label) augmented_data.append(aug.gaussian(image)) augmented_labels.append(label) augmented_data.append(aug.translate(image)) augmented_labels.append(label) augmented_data.append(aug.shear(image)) augmented_labels.append(label) self.data, self.labels = aug.rotation(image, label, augmented_data, augmented_labels) if "_0" in fname: neg_count += 9 else: pos_count += 9 count += 9 self.shuffle() self.convert_data() self.encode_labels() self.Helpers.logger.info("Raw data: " + str(count)) self.Helpers.logger.info("Raw negative data: " + str(neg_count)) self.Helpers.logger.info("Raw positive data: " + str(count)) self.Helpers.logger.info("Augmented data: " + str(self.data.shape)) self.Helpers.logger.info("Labels: " + str(self.labels.shape)) self.get_split()