예제 #1
0
    def load_data(self):

        print("Loading data...")

        X = load_mnist_images('%strain-images-idx3-ubyte.gz' % self.data_path)
        y = load_mnist_labels('%strain-labels-idx1-ubyte.gz' % self.data_path)
        X_test = load_mnist_images('%st10k-images-idx3-ubyte.gz' %
                                   self.data_path)
        y_test = load_mnist_labels('%st10k-labels-idx1-ubyte.gz' %
                                   self.data_path)

        # split into training and validation sets
        np.random.seed(self.seed)
        perm = np.random.permutation(len(X))

        self._X_train = X[perm[self.n_val:]]
        self._y_train = y[perm[self.n_val:]]
        self._X_val = X[perm[:self.n_val]]
        self._y_val = y[perm[:self.n_val]]

        self._X_test = X_test
        self._y_test = y_test

        # normalize data (divide by fixed value)
        normalize_data(self._X_train,
                       self._X_val,
                       self._X_test,
                       scale=np.float32(256))

        flush_last_line()
        print("Data loaded.")
예제 #2
0
def epoch_imagenet(nnet, layer):

    n_batches = Cfg.n_batches
    count = 0

    for batch in nnet.data.get_epoch_train():

        # unpack batch arguments
        X, y, idx = batch

        # transfer primal variable to gpu
        host_to_gpu(layer, idx)

        # perform BCFW update
        layer.svm_update(X, y)

        # transfer primal variable back to host
        gpu_to_host(layer, idx)

        count += 1
        if count % 50 == 0:
            check_dual(nnet, layer)

            if len(nnet.log['dual_objective']) > 0:
                dual_obj = nnet.log['dual_objective'][-1]
                flush_last_line()
                print("Dual objective: %g (it %i out of %i)" %
                      (dual_obj, count, n_batches))
예제 #3
0
    def load_data(self):

        print("Loading data...")

        # load training data
        X, y = [], []
        count = 1
        filename = '%s/data_batch_%i' % (self.data_path, count)
        while os.path.exists(filename):
            with open(filename, 'rb') as f:
                batch = pickle.load(f)
            X.append(batch['data'])
            y.append(batch['labels'])
            count += 1
            filename = '%s/data_batch_%i' % (self.data_path, count)

        # reshape data and cast them properly
        X = np.concatenate(X).reshape(-1, 3, 32, 32).astype(np.float32)
        y = np.concatenate(y).astype(np.int32)

        # load test set
        path = '%s/test_batch' % self.data_path
        with open(path, 'rb') as f:
            batch = pickle.load(f)

        # reshaping and casting for test data
        X_test = batch['data'].reshape(-1, 3, 32, 32).astype(np.float32)
        y_test = np.array(batch['labels'], dtype=np.int32)

        # split into training and validation sets with stored seed
        np.random.seed(self.seed)
        perm = np.random.permutation(len(X))

        self._X_train = X[perm[self.n_val:]]
        self._y_train = y[perm[self.n_val:]]
        self._X_val = X[perm[:self.n_val]]
        self._y_val = y[perm[:self.n_val]]

        self._X_test = X_test
        self._y_test = y_test

        # center data per pixel (mean from X_train)
        center_data(self._X_train, self._X_val, self._X_test, mode="per pixel")

        # normalize data per pixel (std from X_train)
        normalize_data(self._X_train,
                       self._X_val,
                       self._X_test,
                       mode="per pixel")

        flush_last_line()
        print("Data loaded.")
예제 #4
0
    def load_weights(self, nnet):

        print("Loading weights...")
        pickled_file = open('../data/imagenet/vgg16.pkl', "r")
        pickled_dict = pickle.load(pickled_file)

        # loading weights
        pickled_weights = pickled_dict['param values']
        for layer in nnet.trainable_layers:
            layer.W.set_value(pickled_weights.pop(0))
            layer.b.set_value(pickled_weights.pop(0))

        pickled_file.close()
        flush_last_line()
        print("Weights loaded.")
예제 #5
0
def test_epoch_on_each_layers(nnet):

    for layer in nnet.trainable_layers:
        if layer.isconv:
            continue

        layer.initialize_primal(nnet)
        count = 0
        for batch in nnet.data.get_epoch_train():

            # unpack batch arguments
            X, y, idx = batch

            # transfer primal variable to gpu
            host_to_gpu(layer, idx)

            # perform BCFW update
            layer.svm_update(X, y)

            # transfer primal variable back to host
            gpu_to_host(layer, idx)

            # only five it
            count += 1
            flush_last_line()
            if count == 5:
                print("OK on layer %s." % layer.name)
                break

        layer.use_average()

    count = 0
    for batch in nnet.data.get_epoch_val():
        print("Validation batch %i" % count)
        inputs, targets, _ = batch
        err, acc = nnet.lasagne_val_fn(inputs, targets)

        count += 1
        flush_last_line()
        if count == 5:
            print("OK for validation.")
            break

    print("All good.")

    sys.exit()
예제 #6
0
    def load_data(self):

        print("Loading data...")

        train_file = self.data_path + 'train'
        test_file = self.data_path + 'test'

        train_dict = pickle.load(open(train_file, 'rb'))
        test_dict = pickle.load(open(test_file, 'rb'))

        X = train_dict["data"]
        X = np.concatenate(X).reshape(-1, 3, 32, 32).astype(np.float32)
        y = np.array(train_dict["fine_labels"], dtype=np.int32)

        X_test = test_dict["data"]
        y_test = np.array(test_dict["fine_labels"], dtype=np.int32)
        X_test = X_test.reshape(-1, 3, 32, 32).astype(np.float32)

        # split into training and validation sets
        np.random.seed(self.seed)
        perm = np.random.permutation(len(X))

        self._X_train = X[perm[self.n_val:]]
        self._y_train = y[perm[self.n_val:]]
        self._X_val = X[perm[:self.n_val]]
        self._y_val = y[perm[:self.n_val]]

        self._X_test = X_test
        self._y_test = y_test

        # center data per pixel (mean from X_train)
        center_data(self._X_train, self._X_val, self._X_test, mode="per pixel")

        # normalize data per pixel (std from X_train)
        normalize_data(self._X_train,
                       self._X_val,
                       self._X_test,
                       mode="per pixel")

        flush_last_line()
        print("Data loaded.")
예제 #7
0
    def compile_updates(self):
        """ create network from architecture given in modules (determined by dataset)
        create Theano compiled functions
        """

        opt.sgd.updates.create_update(self)

        # avoid unnecessary compilation of not using lwsvm
        if self.solver not in ('svm', 'bpfw'):
            return

        if self.solver == 'bpfw':
            self.update_bpfw = opt.bpfw.updates.compile_update_bpfw(self)
            return

        if Cfg.store_on_gpu:
            from opt.svm.full_gpu import compile_update_conv,\
                compile_update_dense, compile_update_svm
        else:
            from opt.svm.part_gpu import compile_update_conv,\
                compile_update_dense, compile_update_svm

        for layer in self.trainable_layers:

            print("Compiling updates for {}...".format(layer.name))

            layer.hinge_avg = layers.fun.compile_hinge_avg(self, layer)

            if layer.isconv:
                layer.svm_update = compile_update_conv(self, layer)

            if layer.isdense:
                layer.svm_update = compile_update_dense(self, layer)

            if layer.issvm:
                layer.svm_update = compile_update_svm(self, layer)

            flush_last_line()

        print("Updates compiled.")
예제 #8
0
    def load_data(self, original_scale=False):

        print("Loading data...")

        # load training data
        X, y = [], []
        count = 1
        filename = '%s/data_batch_%i' % (self.data_path, count)
        while os.path.exists(filename):
            with open(filename, 'rb') as f:
                batch = pickle.load(f)
            X.append(batch['data'])
            y.append(batch['labels'])
            count += 1
            filename = '%s/data_batch_%i' % (self.data_path, count)

        # reshape data and cast them properly
        X = np.concatenate(X).reshape(-1, 3, 32, 32).astype(np.float32)
        y = np.concatenate(y).astype(np.int32)

        # load test set
        path = '%s/test_batch' % self.data_path
        with open(path, 'rb') as f:
            batch = pickle.load(f)

        # reshaping and casting for test data
        X_test = batch['data'].reshape(-1, 3, 32, 32).astype(np.float32)
        y_test = np.array(batch['labels'], dtype=np.int32)

        if Cfg.ad_experiment:

            normal = eval(Cfg.cifar10_normal)
            outliers = eval(Cfg.cifar10_outlier)

            # extract normal and anomalous class
            X_norm, X_out, y_norm, y_out, _, _ = extract_norm_and_out(
                X, y, normal=normal, outlier=outliers)

            # reduce outliers to fraction defined
            n_norm = len(y_norm)
            n_out = int(np.ceil(Cfg.out_frac * n_norm / (1 - Cfg.out_frac)))

            # shuffle to obtain random validation splits
            np.random.seed(self.seed)
            perm_norm = np.random.permutation(len(y_norm))
            perm_out = np.random.permutation(len(y_out))

            # split into training and validation set
            n_norm_split = int(Cfg.cifar10_val_frac * n_norm)
            n_out_split = int(Cfg.cifar10_val_frac * n_out)
            self._X_train = np.concatenate(
                (X_norm[perm_norm[n_norm_split:]],
                 X_out[perm_out[:n_out][n_out_split:]]))
            self._y_train = np.append(y_norm[perm_norm[n_norm_split:]],
                                      y_out[perm_out[:n_out][n_out_split:]])
            self._X_val = np.concatenate(
                (X_norm[perm_norm[:n_norm_split]],
                 X_out[perm_out[:n_out][:n_out_split]]))
            self._y_val = np.append(y_norm[perm_norm[:n_norm_split]],
                                    y_out[perm_out[:n_out][:n_out_split]])

            # shuffle data (since batches are extracted block-wise)
            self.n_train = len(self._y_train)
            self.n_val = len(self._y_val)
            perm_train = np.random.permutation(self.n_train)
            perm_val = np.random.permutation(self.n_val)
            self._X_train = self._X_train[perm_train]
            self._y_train = self._y_train[perm_train]
            self._X_val = self._X_val[perm_val]
            self._y_val = self._y_val[perm_val]

            # Subset train set such that we only get batches of the same size
            self.n_train = (self.n_train / Cfg.batch_size) * Cfg.batch_size
            subset = np.random.choice(len(self._X_train),
                                      self.n_train,
                                      replace=False)
            self._X_train = self._X_train[subset]
            self._y_train = self._y_train[subset]

            # Adjust number of batches
            Cfg.n_batches = int(np.ceil(self.n_train * 1. / Cfg.batch_size))

            # test set
            X_norm, X_out, y_norm, y_out, idx_norm, idx_out = extract_norm_and_out(
                X_test, y_test, normal=normal, outlier=outliers)

            # store original test labels for visualisation
            yo_norm = y_test[idx_norm]
            yo_out = y_test[idx_out]
            self._yo_test = np.append(yo_norm, yo_out)

            self._X_test = np.concatenate((X_norm, X_out))
            self._y_test = np.append(y_norm, y_out)
            perm_test = np.random.permutation(len(self._y_test))
            self._X_test = self._X_test[perm_test]
            self._y_test = self._y_test[perm_test]
            self._yo_test = self._yo_test[perm_test]
            self.n_test = len(self._y_test)

        else:
            # split into training and validation sets with stored seed
            np.random.seed(self.seed)
            perm = np.random.permutation(len(X))

            self._X_train = X[perm[self.n_val:]]
            self._y_train = y[perm[self.n_val:]]
            self._X_val = X[perm[:self.n_val]]
            self._y_val = y[perm[:self.n_val]]

            self._X_test = X_test
            self._y_test = y_test

        # normalize data (if original scale should not be preserved)
        if not original_scale:

            # simple rescaling to [0,1]
            normalize_data(self._X_train,
                           self._X_val,
                           self._X_test,
                           scale=np.float32(255))

            # global contrast normalization
            if Cfg.gcn:
                global_contrast_normalization(self._X_train,
                                              self._X_val,
                                              self._X_test,
                                              scale=Cfg.unit_norm_used)

            # ZCA whitening
            if Cfg.zca_whitening:
                self._X_train, self._X_val, self._X_test = zca_whitening(
                    self._X_train, self._X_val, self._X_test)

            # rescale to [0,1] (w.r.t. min and max in train data)
            rescale_to_unit_interval(self._X_train, self._X_val, self._X_test)

            # PCA
            if Cfg.pca:
                self._X_train, self._X_val, self._X_test = pca(
                    self._X_train, self._X_val, self._X_test, 0.95)

        flush_last_line()
        print("Data loaded.")
예제 #9
0
    def load_data(self, original_scale=False):

        print("Loading data...")

        X = load_mnist_images('%strain-images-idx3-ubyte.gz' % self.data_path)
        y = load_mnist_labels('%strain-labels-idx1-ubyte.gz' % self.data_path)
        X_test = load_mnist_images('%st10k-images-idx3-ubyte.gz' %
                                   self.data_path)
        y_test = load_mnist_labels('%st10k-labels-idx1-ubyte.gz' %
                                   self.data_path)

        if Cfg.ad_experiment:
            X_norm, y_norm = get_norm_for_mnist()

            # shuffle to obtain random validation splits
            np.random.seed(self.seed)
            perm_norm = np.random.permutation(len(y_norm))
            n_norm = len(y_norm)

            # split into training and validation set
            n_norm_split = int(Cfg.mnist_val_frac * n_norm)
            self._X_train = X_norm[perm_norm[n_norm_split:]]

            self._y_train = y_norm[perm_norm[n_norm_split:]]
            self._X_val = X_norm[perm_norm[:n_norm_split]]
            self._y_val = y_norm[perm_norm[:n_norm_split]]

            # shuffle data (since batches are extracted block-wise)
            self.n_train = len(self._y_train)
            self.n_val = len(self._y_val)
            perm_train = np.random.permutation(self.n_train)
            perm_val = np.random.permutation(self.n_val)
            self._X_train = self._X_train[perm_train]
            self._y_train = self._y_train[perm_train]
            self._X_val = self._X_train[perm_val]
            self._y_val = self._y_train[perm_val]

            print("Number of data in training: " + str(self.n_train))
            print("Number of data in validation: " + str(self.n_val))

            # Subset train set such that we only get batches of the same size
            self.n_train = (self.n_train / Cfg.batch_size) * Cfg.batch_size
            subset = np.random.choice(len(self._X_train),
                                      self.n_train,
                                      replace=False)
            self._X_train = self._X_train[subset]
            self._y_train = self._y_train[subset]

            # Adjust number of batches
            Cfg.n_batches = int(np.ceil(self.n_train * 1. / Cfg.batch_size))

            # test set
            X_norm, X_out, y_norm, y_out = get_outlier_for_mnist()
            self._X_test = np.concatenate((X_norm, X_out))
            self._y_test = np.append(y_norm, y_out)
            perm_test = np.random.permutation(len(self._y_test))
            self._X_test = self._X_test[perm_test]
            self._y_test = self._y_test[perm_test]
            self.n_test = len(self._y_test)
            print("Number of outlier data in testing: " +
                  str(np.shape(y_out)[0]))
            print("Number of normal data in testing: " +
                  str(np.shape(y_norm)[0]))

        else:
            # split into training, validation, and test sets
            np.random.seed(self.seed)
            perm = np.random.permutation(len(X))

            self._X_train = X[perm[self.n_val:]]
            self._y_train = y[perm[self.n_val:]]
            self._X_val = X[perm[:self.n_val]]
            self._y_val = y[perm[:self.n_val]]
            self._X_test = X_test
            self._y_test = y_test

        # normalize data (if original scale should not be preserved)
        if not original_scale:

            # simple rescaling to [0,1]
            normalize_data(self._X_train,
                           self._X_val,
                           self._X_test,
                           scale=np.float32(255))

            # global contrast normalization
            if Cfg.gcn:
                global_contrast_normalization(self._X_train,
                                              self._X_val,
                                              self._X_test,
                                              scale=Cfg.unit_norm_used)

            # ZCA whitening
            if Cfg.zca_whitening:
                self._X_train, self._X_val, self._X_test = zca_whitening(
                    self._X_train, self._X_val, self._X_test)

            # rescale to [0,1] (w.r.t. min and max in train data)
            rescale_to_unit_interval(self._X_train, self._X_val, self._X_test)

            # PCA
            if Cfg.pca:
                self._X_train, self._X_val, self._X_test = pca(
                    self._X_train, self._X_val, self._X_test, 0.95)

        flush_last_line()
        print("Data loaded.")
예제 #10
0
    def load_data(self, original_scale=False):

        print("Loading data...")

        # get train data
        X = readTrafficSigns(rootpath=self.data_path,
                             which_set="train",
                             label=14)

        # get (normal) test data
        # X_test_norm = readTrafficSigns(rootpath=self.data_path, which_set="test", label=14)
        # sub-sample test set data of size
        np.random.seed(self.seed)
        perm = np.random.permutation(len(X))
        X_test_norm = X[perm[:100], ...]
        self._X_train = X[perm[100:], ...]
        self.n_train = len(self._X_train)
        self._y_train = np.zeros(self.n_train, dtype=np.uint8)

        # load (adversarial) test data
        X_test_adv = np.load(self.data_path + "/Images_150.npy")
        labels_adv = np.load(self.data_path + "/Labels_150.npy")

        self._X_test = np.concatenate(
            (X_test_norm, X_test_adv[labels_adv == 1]),
            axis=0).astype(np.float32)
        self._y_test = np.concatenate(
            (np.zeros(len(X_test_norm), dtype=np.uint8),
             np.ones(int(np.sum(labels_adv)), dtype=np.uint8)),
            axis=0)
        self.n_test = len(self._X_test)

        # since val set is referenced at some points initialize empty np arrays
        self._X_val = np.empty(shape=(0, 3, 32, 32), dtype=np.float32)
        self._y_val = np.empty(shape=(0), dtype=np.uint8)

        # Adjust number of batches
        Cfg.n_batches = int(np.ceil(self.n_train * 1. / Cfg.batch_size))

        # shuffle
        np.random.seed(self.seed)
        perm_train = np.random.permutation(self.n_train)
        perm_test = np.random.permutation(self.n_test)
        self._X_train = self._X_train[perm_train, ...]
        self._y_train = self._y_train[perm_train]
        self._X_test = self._X_test[perm_test, ...]
        self._y_test = self._y_test[perm_test]

        # Adjust number of batches
        Cfg.n_batches = int(np.ceil(self.n_train * 1. / Cfg.batch_size))

        # normalize data (if original scale should not be preserved)
        if not original_scale:

            # simple rescaling to [0,1]
            normalize_data(self._X_train,
                           self._X_val,
                           self._X_test,
                           scale=np.float32(255))

            # global contrast normalization
            if Cfg.gcn:
                global_contrast_normalization(self._X_train,
                                              self._X_val,
                                              self._X_test,
                                              scale=Cfg.unit_norm_used)

            # ZCA whitening
            if Cfg.zca_whitening:
                self._X_train, self._X_val, self._X_test = zca_whitening(
                    self._X_train, self._X_val, self._X_test)

            # rescale to [0,1] (w.r.t. min and max in train data)
            rescale_to_unit_interval(self._X_train, self._X_val, self._X_test)

            # PCA
            if Cfg.pca:
                self._X_train, self._X_val, self._X_test = pca(
                    self._X_train, self._X_val, self._X_test, 0.95)

        flush_last_line()
        print("Data loaded.")
예제 #11
0
    def load_data(self, original_scale=False):

        print("Loading data...")

        X = load_mnist_images('%strain-images-idx3-ubyte.gz' % self.data_path)
        y = load_mnist_labels('%strain-labels-idx1-ubyte.gz' % self.data_path)
        X_test = load_mnist_images('%st10k-images-idx3-ubyte.gz' %
                                   self.data_path)
        y_test = load_mnist_labels('%st10k-labels-idx1-ubyte.gz' %
                                   self.data_path)

        if Cfg.ad_experiment:

            # set normal and anomalous class
            normal = []
            outliers = []

            if Cfg.mnist_normal == -1:
                normal = list(range(0, 10))
                normal.remove(Cfg.mnist_outlier)
            else:
                normal.append(Cfg.mnist_normal)

            if Cfg.mnist_outlier == -1:
                outliers = list(range(0, 10))
                outliers.remove(Cfg.mnist_normal)
            else:
                outliers.append(Cfg.mnist_outlier)

            # extract normal and anomalous class
            X_norm, X_out, y_norm, y_out = extract_norm_and_out(
                X, y, normal=normal, outlier=outliers)

            # reduce outliers to fraction defined
            n_norm = len(y_norm)
            n_out = int(np.ceil(Cfg.out_frac * n_norm / (1 - Cfg.out_frac)))

            # shuffle to obtain random validation splits
            np.random.seed(self.seed)
            perm_norm = np.random.permutation(len(y_norm))
            perm_out = np.random.permutation(len(y_out))

            # split into training and validation set
            n_norm_split = int(Cfg.mnist_val_frac * n_norm)
            n_out_split = int(Cfg.mnist_val_frac * n_out)
            self._X_train = np.concatenate(
                (X_norm[perm_norm[n_norm_split:]],
                 X_out[perm_out[:n_out][n_out_split:]]))
            self._y_train = np.append(y_norm[perm_norm[n_norm_split:]],
                                      y_out[perm_out[:n_out][n_out_split:]])
            self._X_val = np.concatenate(
                (X_norm[perm_norm[:n_norm_split]],
                 X_out[perm_out[:n_out][:n_out_split]]))
            self._y_val = np.append(y_norm[perm_norm[:n_norm_split]],
                                    y_out[perm_out[:n_out][:n_out_split]])

            # shuffle data (since batches are extracted block-wise)
            self.n_train = len(self._y_train)
            self.n_val = len(self._y_val)
            perm_train = np.random.permutation(self.n_train)
            perm_val = np.random.permutation(self.n_val)
            self._X_train = self._X_train[perm_train]
            self._y_train = self._y_train[perm_train]
            self._X_val = self._X_train[perm_val]
            self._y_val = self._y_train[perm_val]

            # Subset train set such that we only get batches of the same size
            self.n_train = (self.n_train / Cfg.batch_size) * Cfg.batch_size
            subset = np.random.choice(len(self._X_train),
                                      int(self.n_train),
                                      replace=False)
            self._X_train = self._X_train[subset]
            self._y_train = self._y_train[subset]

            # Adjust number of batches
            Cfg.n_batches = int(np.ceil(self.n_train * 1. / Cfg.batch_size))

            # test set
            X_norm, X_out, y_norm, y_out = extract_norm_and_out(
                X_test, y_test, normal=normal, outlier=outliers)
            self._X_test = np.concatenate((X_norm, X_out))
            self._y_test = np.append(y_norm, y_out)
            perm_test = np.random.permutation(len(self._y_test))
            self._X_test = self._X_test[perm_test]
            self._y_test = self._y_test[perm_test]
            self.n_test = len(self._y_test)

        else:
            # split into training, validation, and test sets
            np.random.seed(self.seed)
            perm = np.random.permutation(len(X))

            self._X_train = X[perm[self.n_val:]]
            self._y_train = y[perm[self.n_val:]]
            self._X_val = X[perm[:self.n_val]]
            self._y_val = y[perm[:self.n_val]]
            self._X_test = X_test
            self._y_test = y_test

        # normalize data (if original scale should not be preserved)
        if not original_scale:

            # simple rescaling to [0,1]
            normalize_data(self._X_train,
                           self._X_val,
                           self._X_test,
                           scale=np.float32(255))

            # global contrast normalization
            if Cfg.gcn:
                global_contrast_normalization(self._X_train,
                                              self._X_val,
                                              self._X_test,
                                              scale=Cfg.unit_norm_used)

            # ZCA whitening
            if Cfg.zca_whitening:
                self._X_train, self._X_val, self._X_test = zca_whitening(
                    self._X_train, self._X_val, self._X_test)

            # rescale to [0,1] (w.r.t. min and max in train data)
            rescale_to_unit_interval(self._X_train, self._X_val, self._X_test)

            # PCA
            if Cfg.pca:
                self._X_train, self._X_val, self._X_test = pca(
                    self._X_train, self._X_val, self._X_test, 0.95)

        flush_last_line()
        print("Data loaded.")
예제 #12
0
    def load_data(self, original_scale=False):

        print("Loading data...")

        # load normal and outlier data
        self._X_train = [img_to_array(load_img(Cfg.train_folder + filename)) for filename in os.listdir(Cfg.train_folder)][:Cfg.n_train]
        self._X_val = [img_to_array(load_img(Cfg.val_folder + filename)) for filename in os.listdir(Cfg.val_folder)][:Cfg.n_val]
        n_test_out = Cfg.n_test - Cfg.n_test_in
        _X_test_in = [img_to_array(load_img(Cfg.test_in_folder + filename)) for filename in os.listdir(Cfg.test_in_folder)][:Cfg.n_test_in]
        _X_test_out = [img_to_array(load_img(Cfg.test_out_folder + filename)) for filename in os.listdir(Cfg.test_out_folder)][:n_test_out]
        _y_test_in  = np.zeros((Cfg.n_test_in,),dtype=np.int32)
        _y_test_out = np.ones((n_test_out,),dtype=np.int32)
        self._X_test = np.concatenate([_X_test_in, _X_test_out])
        self._y_test = np.concatenate([_y_test_in, _y_test_out])
        self.out_frac = Cfg.out_frac

        # tranpose to channels first
        self._X_train = np.moveaxis(self._X_train,-1,1)
        self._X_val = np.moveaxis(self._X_val,-1,1)
        self._X_test = np.moveaxis(self._X_test,-1,1)


        # cast data properly
        self._X_train = self._X_train.astype(np.float32)
        self._X_val = self._X_val.astype(np.float32)
        self._X_test = self._X_test.astype(np.float32)
        self._y_test = self._y_test.astype(np.int32)

        # Train and val labels are 0, since all are normal class
        self._y_train = np.zeros((len(self._X_train),),dtype=np.int32)
        self._y_val = np.zeros((len(self._X_val),),dtype=np.int32)

        if Cfg.ad_experiment:
            # shuffle to obtain random validation splits
            np.random.seed(self.seed)

            # shuffle data (since batches are extracted block-wise)
            self.n_train = len(self._y_train)
            self.n_val = len(self._y_val)
            perm_train = np.random.permutation(self.n_train)
            perm_val = np.random.permutation(self.n_val)
            self._X_train = self._X_train[perm_train]
            self._y_train = self._y_train[perm_train]
            self._X_val = self._X_train[perm_val]
            self._y_val = self._y_train[perm_val]
            print("Shuffled data")

            # Subset train set such that we only get batches of the same size
            assert(self.n_train >= Cfg.batch_size)
            self.n_train = (self.n_train / Cfg.batch_size) * Cfg.batch_size
            subset = np.random.choice(len(self._X_train), self.n_train, replace=False)
            self._X_train = self._X_train[subset]
            self._y_train = self._y_train[subset]

            # Adjust number of batches
            Cfg.n_batches = int(np.ceil(self.n_train * 1. / Cfg.batch_size))

        # normalize data (if original scale should not be preserved)
        if not original_scale:

            # simple rescaling to [0,1]
            normalize_data(self._X_train, self._X_val, self._X_test, scale=np.float32(255))

            # global contrast normalization
            if Cfg.gcn:
                global_contrast_normalization(self._X_train, self._X_val, self._X_test, scale=Cfg.unit_norm_used)

            # ZCA whitening
            if Cfg.zca_whitening:
                self._X_train, self._X_val, self._X_test = zca_whitening(self._X_train, self._X_val, self._X_test)

            # rescale to [0,1] (w.r.t. min and max in train data)
            rescale_to_unit_interval(self._X_train, self._X_val, self._X_test)

            # PCA
            if Cfg.pca:
                self._X_train, self._X_val, self._X_test = pca(self._X_train, self._X_val, self._X_test, 0.95)

        flush_last_line()
        print("Max pixel value: ", np.amax(self._X_train))
        print("Data loaded.")
예제 #13
0
    def load_data(self, original_scale=False):

        print("[INFO ]: ", "Please wait while ", self.dataset_name,
              " data is being loaded...")

        [X, y] = load_lhc_train_images(self.data_path)
        [X_test, y_test] = load_lhc_test_images(self.data_path)

        if Cfg.ad_experiment:

            # set normal and anomalous class
            normal = [1]
            outliers = [0]

            # extract normal and anomalous class
            X_norm, X_out, y_norm, y_out = extract_norm_and_out(
                X, y, normal=normal, outlier=outliers)

            # reduce outliers to fraction defined
            n_norm = len(y_norm)
            n_out = int(
                np.ceil(
                    float(Cfg.out_frac) * n_norm / (1 - float(Cfg.out_frac))))

            # shuffle to obtain random validation splits
            np.random.seed(self.seed)
            perm_norm = np.random.permutation(len(y_norm))
            perm_out = np.random.permutation(len(y_out))

            # split into training and validation set
            n_norm_split = int(Cfg.lhc_val_frac * n_norm)
            n_out_split = int(Cfg.lhc_val_frac * n_out)
            self._X_train = np.concatenate(
                (X_norm[perm_norm[n_norm_split:]],
                 X_out[perm_out[:n_out][n_out_split:]]))
            self._y_train = np.append(y_norm[perm_norm[n_norm_split:]],
                                      y_out[perm_out[:n_out][n_out_split:]])
            self._X_val = np.concatenate(
                (X_norm[perm_norm[:n_norm_split]],
                 X_out[perm_out[:n_out][:n_out_split]]))
            self._y_val = np.append(y_norm[perm_norm[:n_norm_split]],
                                    y_out[perm_out[:n_out][:n_out_split]])

            # shuffle data (since batches are extracted block-wise)
            self.n_train = len(self._y_train)
            self.n_val = len(self._y_val)
            perm_train = np.random.permutation(self.n_train)
            perm_val = np.random.permutation(self.n_val)
            self._X_train = self._X_train[perm_train]
            self._y_train = self._y_train[perm_train]
            self._X_val = self._X_train[perm_val]
            self._y_val = self._y_train[perm_val]

            # Subset train set such that we only get batches of the same size
            self.n_train = (self.n_train / Cfg.batch_size) * Cfg.batch_size
            subset = np.random.choice(len(self._X_train),
                                      int(self.n_train),
                                      replace=False)
            self._X_train = self._X_train[subset]
            self._y_train = self._y_train[subset]

            # Adjust number of batches
            Cfg.n_batches = int(np.ceil(self.n_train * 1. / Cfg.batch_size))

            # test set
            X_norm, X_out, y_norm, y_out = extract_norm_and_out(
                X_test, y_test, normal=normal, outlier=outliers)
            self._X_test = np.concatenate((X_norm, X_out))
            self._y_test = np.append(y_norm, y_out)
            perm_test = np.random.permutation(len(self._y_test))
            self._X_test = self._X_test[perm_test]
            self._y_test = self._y_test[perm_test]
            self.n_test = len(self._y_test)

        else:
            # split into training, validation, and test sets
            np.random.seed(self.seed)
            perm = np.random.permutation(len(X))

            self._X_train = X[perm[self.n_val:]]
            self._y_train = y[perm[self.n_val:]]
            self._X_val = X[perm[:self.n_val]]
            self._y_val = y[perm[:self.n_val]]
            self._X_test = X_test
            self._y_test = y_test

        # normalize data (if original scale should not be preserved)
        if not original_scale:

            # simple rescaling to [0,1]
            normalize_data(self._X_train,
                           self._X_val,
                           self._X_test,
                           scale=np.float32(255))

            # global contrast normalization
            if Cfg.gcn:
                global_contrast_normalization(self._X_train,
                                              self._X_val,
                                              self._X_test,
                                              scale=Cfg.unit_norm_used)

            # ZCA whitening
            if Cfg.zca_whitening:
                self._X_train, self._X_val, self._X_test = zca_whitening(
                    self._X_train, self._X_val, self._X_test)

            # rescale to [0,1] (w.r.t. min and max in train data)
            rescale_to_unit_interval(self._X_train, self._X_val, self._X_test)

            # PCA
            if Cfg.pca:
                self._X_train, self._X_val, self._X_test = pca(
                    self._X_train, self._X_val, self._X_test, 0.95)

        flush_last_line()
        print("[INFO] : Data loaded.")
예제 #14
0
    def load_data(self, original_scale=False):

        print("Loading data...")

        X, y = load_mobiFall_data('%smobiFall/train_mobiFall' % self.data_path)

        X_test, y_test = load_mobiFall_data('%smobiFall/test_mobiFall' %
                                            self.data_path)

        if Cfg.ad_experiment:

            # set normal and anomalous class
            normal = eval(Cfg.mobiFall_normal)
            outliers = eval(Cfg.mobiFall_outlier)

            # extract normal and anomalous class
            X_norm, X_out, y_norm, y_out, idx_norm, idx_out = extract_norm_and_out(
                X, y, normal=normal, outlier=outliers)

            # reduce outliers to fraction defined
            n_norm = len(y_norm)
            n_out = int(np.ceil(Cfg.out_frac * n_norm / (1 - Cfg.out_frac)))

            # shuffle to obtain random validation splits
            np.random.seed(self.seed)
            perm_norm = np.random.permutation(len(y_norm))
            perm_out = np.random.permutation(len(y_out))

            # split into training and validation set
            n_norm_split = int(Cfg.mobiFall_val_frac * n_norm)
            n_out_split = int(Cfg.mobiFall_val_frac * n_out)
            self._X_train = np.concatenate(
                (X_norm[perm_norm[n_norm_split:]],
                 X_out[perm_out[:n_out][n_out_split:]]))
            self._y_train = np.append(y_norm[perm_norm[n_norm_split:]],
                                      y_out[perm_out[:n_out][n_out_split:]])
            self._X_val = np.concatenate(
                (X_norm[perm_norm[:n_norm_split]],
                 X_out[perm_out[:n_out][:n_out_split]]))
            self._y_val = np.append(y_norm[perm_norm[:n_norm_split]],
                                    y_out[perm_out[:n_out][:n_out_split]])

            # shuffle data (since batches are extracted block-wise)
            self.n_train = len(self._y_train)
            self.n_val = len(self._y_val)
            perm_train = np.random.permutation(self.n_train)
            perm_val = np.random.permutation(self.n_val)
            self._X_train = self._X_train[perm_train]
            self._y_train = self._y_train[perm_train]
            self._X_val = self._X_val[perm_val]
            self._y_val = self._y_val[perm_val]

            # Subset train set such that we only get batches of the same size
            self.n_train = (self.n_train / Cfg.batch_size) * Cfg.batch_size
            subset = np.random.choice(len(self._X_train),
                                      self.n_train,
                                      replace=False)
            self._X_train = self._X_train[subset]
            self._y_train = self._y_train[subset]

            # Adjust number of batches
            Cfg.n_batches = int(np.ceil(self.n_train * 1. / Cfg.batch_size))

            # test set
            X_norm, X_out, y_norm, y_out, idx_norm, idx_out = extract_norm_and_out(
                X_test, y_test, normal=normal, outlier=outliers)

            yo_norm = y_test[idx_norm]
            yo_out = y_test[idx_out]
            self._yo_test = np.append(yo_norm, yo_out)

            self._X_test = np.concatenate((X_norm, X_out))
            self._y_test = np.append(y_norm, y_out)
            perm_test = np.random.permutation(len(self._y_test))
            self._X_test = self._X_test[perm_test]
            self._y_test = self._y_test[perm_test]
            self._yo_test = self._yo_test[perm_test]
            self.n_test = len(self._y_test)

        else:
            # split into training, validation, and test sets
            np.random.seed(self.seed)
            perm = np.random.permutation(len(X))

            self._X_train = X[perm[self.n_val:]]
            self._y_train = y[perm[self.n_val:]]
            self._X_val = X[perm[:self.n_val]]
            self._y_val = y[perm[:self.n_val]]
            self._X_test = X_test
            self._y_test = y_test

        # normalize data (if original scale should not be preserved)
        if not original_scale:

            # rescale to [0,1] (w.r.t. min and max in train data)
            rescale_to_unit_interval(self._X_train, self._X_val, self._X_test)

            # PCA
            if Cfg.pca:
                self._X_train, self._X_val, self._X_test = pca(
                    self._X_train, self._X_val, self._X_test, 0.95)

        flush_last_line()
        print("Data loaded.")