def load_data( self, original_scale=False, ): print("[INFO: ] Loading data...") X = load_mnist_images('%strain-images-idx3-ubyte.gz' % self.data_path) y = load_mnist_labels('%strain-labels-idx1-ubyte.gz' % self.data_path) X_test = load_mnist_images('%st10k-images-idx3-ubyte.gz' % self.data_path) y_test = load_mnist_labels('%st10k-labels-idx1-ubyte.gz' % self.data_path) if Cfg.ad_experiment: # set normal and anomalous class normal = [] outliers = [] if Cfg.mnist_normal == -1: normal = list(range(0, 10)) normal.remove(Cfg.mnist_outlier) else: normal.append(Cfg.mnist_normal) if Cfg.mnist_outlier == -1: outliers = list(range(0, 10)) outliers.remove(Cfg.mnist_normal) else: outliers.append(Cfg.mnist_outlier) print("[INFO:] The label of outlier points are ", Cfg.mnist_outlier) print("[INFO:] The number of outlier points are ", len(outliers)) print("[INFO:] The label of normal points are ", Cfg.mnist_normal) # extract normal and anomalous class X_norm, X_out, y_norm, y_out = extract_norm_and_out( X, y, normal=normal, outlier=outliers) # reduce outliers to fraction defined n_norm = len(y_norm) n_out = int(np.ceil(Cfg.out_frac * n_norm / (1 - Cfg.out_frac))) # # print("[INFO:] The number of normal data points are ", (n_norm)) # print("[INFO:] The number of outlier data points are ", (n_out)) # shuffle to obtain random validation splits print("[INFO:] Random Seed used is ", Cfg.seed) np.random.seed(self.seed) perm_norm = np.random.permutation(len(y_norm)) perm_out = np.random.permutation(len(y_out)) # split into training and validation set n_norm_split = int(Cfg.mnist_val_frac * n_norm) n_out_split = int(Cfg.mnist_val_frac * n_out) X_norm_Training = X_norm[perm_norm[n_norm_split:]] X_out_Training = X_out[perm_out[:n_out][n_out_split:]] # print("[INFO:] The shape of Normal used in training+validation ", X_norm_Training.shape) # print("[INFO:] The shape of Outlier used in training+validation ", X_out_Training.shape) self._X_train = np.concatenate( (X_norm[perm_norm[n_norm_split:]], X_out[perm_out[:n_out][n_out_split:]])) self._y_train = np.append(y_norm[perm_norm[n_norm_split:]], y_out[perm_out[:n_out][n_out_split:]]) self._X_val = np.concatenate( (X_norm[perm_norm[:n_norm_split]], X_out[perm_out[:n_out][:n_out_split]])) self._y_val = np.append(y_norm[perm_norm[:n_norm_split]], y_out[perm_out[:n_out][:n_out_split]]) # print("[INFO:] The shape of Data used in [ Training ] ", self._X_train.shape) # print("[INFO:] The shape of Data used in [ Validation ] ", self._X_val.shape) # shuffle data (since batches are extracted block-wise) self.n_train = len(self._y_train) self.n_val = len(self._y_val) perm_train = np.random.permutation(self.n_train) perm_val = np.random.permutation(self.n_val) self._X_train = self._X_train[perm_train] self._y_train = self._y_train[perm_train] self._X_val = self._X_train[perm_val] self._y_val = self._y_train[perm_val] # Subset train set such that we only get batches of the same size self.n_train = (self.n_train / Cfg.batch_size) * Cfg.batch_size subset = np.random.choice(len(self._X_train), int(self.n_train), replace=False) self._X_train = self._X_train[subset] self._y_train = self._y_train[subset] # Adjust number of batches Cfg.n_batches = int(np.ceil(self.n_train * 1. / Cfg.batch_size)) # test set X_norm, X_out, y_norm, y_out = extract_norm_and_out( X_test, y_test, normal=normal, outlier=outliers) self._X_test = np.concatenate((X_norm, X_out)) self._y_test = np.append(y_norm, y_out) perm_test = np.random.permutation(len(self._y_test)) self._X_test = self._X_test[perm_test] self._y_test = self._y_test[perm_test] self.n_test = len(self._y_test) # # print("[INFO:] The shape of Normal instances used in Testing ", X_norm.shape) # print("[INFO:] The shape of Outlier instances used in Testing ", X_out.shape) # print("========================================================================") else: # split into training, validation, and test sets np.random.seed(self.seed) perm = np.random.permutation(len(X)) self._X_train = X[perm[self.n_val:]] self._y_train = y[perm[self.n_val:]] self._X_val = X[perm[:self.n_val]] self._y_val = y[perm[:self.n_val]] self._X_test = X_test self._y_test = y_test # normalize data (if original scale should not be preserved) if not original_scale: # simple rescaling to [0,1] normalize_data(self._X_train, self._X_val, self._X_test, scale=np.float32(255)) # global contrast normalization if Cfg.gcn: global_contrast_normalization(self._X_train, self._X_val, self._X_test, scale=Cfg.unit_norm_used) # ZCA whitening if Cfg.zca_whitening: self._X_train, self._X_val, self._X_test = zca_whitening( self._X_train, self._X_val, self._X_test) # rescale to [0,1] (w.r.t. min and max in train data) rescale_to_unit_interval(self._X_train, self._X_val, self._X_test) # PCA if Cfg.pca: self._X_train, self._X_val, self._X_test = pca( self._X_train, self._X_val, self._X_test, 0.95) print("[INFO: ] Data loaded.")
def load_data(self, original_scale=True): print("[INFO:] Loading data...") # get train data X = readTrafficSigns(rootpath=self.data_path, which_set="train", label=14) # X = readTrafficSigns_asnparray(rootpath=self.data_path, which_set="train", label=14) # X_test_norm = readTrafficSigns(rootpath=self.data_path, which_set="test", label=14) # X_test_adv = np.load(self.data_path + "/Images_150.npy") # print("Shape of input",X.shape) # X_test_adv = np.moveaxis(X_test_adv, 1, 3) # print("Shape of X_test_adv, ", X_test_adv.shape) # debug_visualise_anamolies_detected(X_test_adv,X_test_adv,X_test_adv,X_test_adv) # # plot_cifar(X_test_adv,10,10) # exit() # get (normal) test data # X_test_norm = readTrafficSigns(rootpath=self.data_path, which_set="test", label=14) # sub-sample test set data of size print("The random seed used in the experiment is ", self.seed) self.seed = Cfg.seed np.random.seed(self.seed) perm = np.random.permutation(len(X)) X_test_norm = X[perm[:100], ...] self._X_train = X[perm[100:], ...] self.n_train = len(self._X_train) self._y_train = np.zeros(self.n_train, dtype=np.uint8) # load (adversarial) test data print("[INFO:] Loading adversarial data...") X_test_adv = np.load(self.data_path + "/Images_150.npy") labels_adv = np.load(self.data_path + "/Labels_150.npy") # print("[INFO:] The number of Loading adversarial data...",len(X_test_adv)) # X_test_adv , labels_adv = self.generate_AdversarialSigns(X_test_norm) self._X_test = np.concatenate( (X_test_norm, X_test_adv[labels_adv == 1]), axis=0).astype(np.float32) self._y_test = np.concatenate( (np.zeros(len(X_test_norm), dtype=np.uint8), 1 * np.ones(int(np.sum(labels_adv)), dtype=np.uint8)), axis=0) self.n_test = len(self._X_test) # since val set is referenced at some points initialize empty np arrays self._X_val = np.empty(shape=(0, 3, 32, 32), dtype=np.float32) self._y_val = np.empty(shape=(0), dtype=np.uint8) # Adjust number of batches Cfg.n_batches = int(np.ceil(self.n_train * 1. / Cfg.batch_size)) # shuffle np.random.seed(self.seed) perm_train = np.random.permutation(self.n_train) perm_test = np.random.permutation(self.n_test) self._X_train = self._X_train[perm_train, ...] self._y_train = self._y_train[perm_train] self._X_test = self._X_test[perm_test, ...] self._y_test = self._y_test[perm_test] positiveSamples_test = self._X_test[np.where(self._y_test == 0)] positiveSamples_test = positiveSamples_test[0:100] positiveSamples_test = np.concatenate( (self._X_train[0:170], positiveSamples_test)) negativeSamples_test = self._X_test[np.where(self._y_test == 1)] #negativeSamples_test = negativeSamples_test[0:50] #negativeSamples_test = negativeSamples_test[0:40] negativeSamples_test = negativeSamples_test[0:100] self._X_train = np.concatenate((self._X_train, positiveSamples_test)) self._X_train = self._X_train[0:780] self._y_train = np.zeros(len(self._X_train)) y_positiveSamples_test = np.zeros(len(positiveSamples_test)) y_negativeSamples_test = 1 * np.ones(len(negativeSamples_test)) self._X_test = np.concatenate( (positiveSamples_test, negativeSamples_test)) self._y_test = np.concatenate( (y_positiveSamples_test, y_negativeSamples_test)) # print("[INFO:] The number of train samples", len(self._X_train)) # print("[INFO:] The number of test samples", len(self._X_test)) print("[INFO:] Negative Y_test labels", len(self._y_test[np.where(self._y_test == 1)])) print("[INFO:] Positive Y_test labels", len(self._y_test[np.where(self._y_test == 0)])) # Adjust number of batches Cfg.n_batches = int(np.ceil(self.n_train * 1. / Cfg.batch_size)) # normalize data (if original scale should not be preserved) if not original_scale: # simple rescaling to [0,1] normalize_data(self._X_train, self._X_val, self._X_test, scale=np.float32(255)) # global contrast normalization if Cfg.gcn: global_contrast_normalization(self._X_train, self._X_val, self._X_test, scale=Cfg.unit_norm_used) # ZCA whitening if Cfg.zca_whitening: self._X_train, self._X_val, self._X_test = zca_whitening( self._X_train, self._X_val, self._X_test) # rescale to [0,1] (w.r.t. min and max in train data) rescale_to_unit_interval(self._X_train, self._X_val, self._X_test) # PCA if Cfg.pca: self._X_train, self._X_val, self._X_test = pca( self._X_train, self._X_val, self._X_test, 0.95) flush_last_line() # self._X_train = np.concatenate((self._X_train, self._X_test)) # self._y_train = np.concatenate((self._y_train, self._y_test)) # Make sure the axis dimensions are aligned for training convolutional autoencoders self._X_train = np.moveaxis(self._X_train, 1, 3) self._X_test = np.moveaxis(self._X_test, 1, 3) self._X_train = self._X_train / 255.0 self._X_test = self._X_test / 255.0 X_train = self._X_train X_test = self._X_test y_train = self._y_train y_test = self._y_test print("X_train,X_test====>", X_train.shape, X_test.shape) ## Combine the positive data trainXPos = X_train[np.where(y_train == 0)] trainYPos = np.zeros(len(trainXPos)) testXPos = X_test[np.where(y_test == 0)] testYPos = np.zeros(len(testXPos)) # Combine the negative data trainXNeg = X_train[np.where(y_train == 1)] trainYNeg = np.ones(len(trainXNeg)) testXNeg = X_test[np.where(y_test == 1)] testYNeg = np.ones(len(testXNeg)) print("trainXPos,testXPos", trainXPos.shape, testXPos.shape) X_trainPOS = np.concatenate((trainXPos, testXPos)) y_trainPOS = np.concatenate((trainYPos, testYPos)) X_trainNEG = np.concatenate((trainXNeg, testXNeg)) y_trainNEG = np.concatenate((trainYNeg, testYNeg)) # Just 0.01 points are the number of anomalies. num_of_anomalies = int(0.1 * len(X_trainPOS)) X_trainNEG = X_trainNEG[0:num_of_anomalies] y_trainNEG = y_trainNEG[0:num_of_anomalies] X_train = np.concatenate((X_trainPOS, X_trainNEG)) y_train = np.concatenate((y_trainPOS, y_trainNEG)) self._X_train = X_train self._y_train = y_train self._X_test = X_train self._y_test = y_train self._X_test_beforegcn = X_train self._y_test_beforegcn = y_train self._X_test_beforegcn = np.reshape( self._X_test_beforegcn, (len(self._X_test_beforegcn), 32, 32, 3)) X_test_sample = self._X_test[-5:] import random random_list = random.sample(range(1, 700), 5) X_train_sample = self._X_train[random_list] print("[INFO:] The shape of self.data._X_train", self._X_train.shape) print("[INFO:] The shape of self.data._X_test", self._X_test.shape) X_test = np.concatenate((X_train_sample, X_test_sample)) # X_train_sample = np.moveaxis(X_train_sample, 1, 3) # X_test_sample = np.moveaxis(X_test_sample, 1, 3) # X_train_sample = X_train_sample/255.0 # X_test_sample = X_test_sample / 255.0 # self.save_reconstructed_image(X_train_sample, X_train_sample) # global contrast normalization # if Cfg.gcn: # [self._X_train,self._X_val,self._X_test] = global_contrast_normalization(self._X_train, self._X_val, self._X_test, scale=Cfg.unit_norm_used) # self._X_test = self._X_train print("Data loaded.")