示例#1
0
    def load_data(self):
        """
        Load data.
        """

        self.test_images = utils.read_hdf5(self.args.test_images_file).astype(
            numpy.float32)
        log('[Attack] read %s' % self.args.test_images_file)

        # For color and gray images.
        if len(self.test_images.shape) < 4:
            self.test_images = numpy.expand_dims(self.test_images, axis=3)

        self.test_codes = utils.read_hdf5(self.args.test_codes_file).astype(
            numpy.int)
        self.test_codes = self.test_codes[:, self.args.label_index]
        log('[Attack] read %s' % self.args.test_codes_file)

        self.test_theta = utils.read_hdf5(self.args.test_theta_file).astype(
            numpy.float32)
        log('[Attack] read %s' % self.args.test_theta_file)

        self.N_class = numpy.max(self.test_codes) + 1
        self.min_bound = numpy.min(self.test_theta, 0)
        self.max_bound = numpy.max(self.test_theta, 0)

        if self.args.max_samples < 0:
            self.args.max_samples = self.test_theta.shape[0]
        else:
            self.args.max_samples = min(self.args.max_samples,
                                        self.test_theta.shape[0])
    def load_data(self):
        """
        Load data.
        """

        test_codes = utils.read_hdf5(self.args.test_codes_file).astype(numpy.int)
        self.test_fonts = test_codes[:, 1]
        self.test_classes = test_codes[:, 2]
        log('[Attack] read %s' % self.args.test_codes_file)

        self.test_theta = utils.read_hdf5(self.args.test_theta_file).astype(numpy.float32)
        log('[Attack] read %s' % self.args.test_theta_file)

        self.min_bound = numpy.min(self.test_theta, 0)
        self.max_bound = numpy.max(self.test_theta, 0)

        if self.args.max_samples < 0:
            self.args.max_samples = self.test_theta.shape[0]
        else:
            self.args.max_samples = min(self.args.max_samples, self.test_theta.shape[0])
示例#3
0
    def compute_statistics(self):
        """
        Compute statistics based on distances.
        """

        # That's the basis for all computation as we only want to consider successful attacks
        # on test samples that were correctly classified.
        raw_overall_success = numpy.logical_and(self.success >= 0, self.accuracy)

        # Important check, for on-manifold attack this will happen if the manifold is small and the model very accurate!
        if not numpy.any(raw_overall_success):
            for n in range(len(self.norms)):
                for type in ['raw_success', 'raw_iteration', 'raw_average', 'raw_image']:
                    self.results[n][type] = 0
                for type in ['raw_class_success', 'raw_class_average', 'raw_class_image']:
                    self.results[n][type] = numpy.zeros((self.N_class))
            if self.args.results_file:
                utils.write_pickle(self.args.results_file, self.results)
                log('[Testing] wrote %s' % self.args.results_file)
            return

        #
        # Compute nearest neighbor statistics in image space.
        #

        if self.args.plot_directory and self.args.plot_manifolds and utils.display():
            log('[Testing] computing nearest neighbor ...')
            nearest_neighbors_indices = self.compute_nearest_neighbors(self.perturbation_images[raw_overall_success])
            pure_perturbations = self.test_images[raw_overall_success] - self.perturbation_images[raw_overall_success]
            pure_perturbations_norm = numpy.linalg.norm(pure_perturbations, ord=2, axis=1)
            for k in range(10):
                direction = self.perturbation_images[raw_overall_success] - self.train_images[nearest_neighbors_indices[:, k]]
                direction_norm = numpy.linalg.norm(direction, ord=2, axis=1)
                dot_products = numpy.einsum('ij,ij->i', direction, pure_perturbations)
                dot_product_norms = numpy.multiply(pure_perturbations_norm, direction_norm)
                dot_products, dot_product_norms = dot_products[dot_product_norms > 10**-8], dot_product_norms[dot_product_norms > 10**-8]
                dot_products /= dot_product_norms
                dot_products = numpy.degrees(numpy.arccos(dot_products))

                # matplotlib's hsitogram plots give weird error if there are NaN values, so simple check:
                if dot_products.shape[0] > 0 and not numpy.any(dot_products != dot_products):
                    plot_file = os.path.join(self.args.plot_directory, 'dot_products_nn%d' % k)
                    plot.histogram(plot_file, dot_products, 100, xmin=numpy.min(dot_products), xmax=numpy.max(dot_products),
                                  title='Dot Products Between Adversarial Perturbations and Direction to Nearest Neighbor %d' % k,
                                  xlabel='Dot Product', ylabel='Count')
                    log('[Testing] wrote %s' % plot_file)

        #
        # We compute some simple statistics:
        # - raw success rate: fraction of successful attack without considering epsilon
        # - corrected success rate: fraction of successful attacks within epsilon-ball
        # - raw average perturbation: average distance to original samples (for successful attacks)
        # - corrected average perturbation: average distance to original samples for perturbations
        #   within epsilon-ball (for successful attacks).
        # These statistics can also be computed per class.
        # And these statistics are computed with respect to three norms.

        if self.args.plot_directory and utils.display():
            iterations = self.success[raw_overall_success]
            x = numpy.arange(numpy.max(iterations) + 1)
            y = numpy.bincount(iterations)
            plot_file = os.path.join(self.args.plot_directory, 'iterations')
            plot.bar(plot_file, x, y,
                    title='Distribution of Iterations of Successful Attacks', xlabel='Number of Iterations', ylabel='Count')
            log('[Testing] wrote %s' % plot_file)

        reference_perturbations = numpy.zeros(self.perturbations.shape)
        if self.args.N_theta > 4:
            reference_perturbations[:, 4] = 1

        for n in range(len(self.norms)):
            norm = self.norms[n]
            delta = numpy.linalg.norm(self.perturbations - reference_perturbations, norm, axis=1)
            image_delta = numpy.linalg.norm(self.test_images - self.perturbation_images, norm, axis=1)

            if self.args.plot_directory and utils.display():
                plot_file = os.path.join(self.args.plot_directory, 'distances_l%g' % norm)
                plot.histogram(plot_file, delta[raw_overall_success], 50, title='Distribution of $L_{%g}$ Distances of Successful Attacks' % norm,
                              xlabel='Distance', ylabel='Count')
                log('[Testing] wrote %s' % plot_file)

            debug_accuracy = numpy.sum(self.accuracy) / self.accuracy.shape[0]
            debug_attack_fraction = numpy.sum(raw_overall_success) / numpy.sum(self.success >= 0)
            debug_test_fraction = numpy.sum(raw_overall_success) / numpy.sum(self.accuracy)
            log('[Testing] attacked mode accuracy: %g' % debug_accuracy)
            log('[Testing] only %g of successful attacks are valid' % debug_attack_fraction)
            log('[Testing] only %g of correct samples are successfully attacked' % debug_test_fraction)

            N_accuracy = numpy.sum(self.accuracy)
            self.results[n]['raw_success'] = numpy.sum(raw_overall_success) / N_accuracy

            self.results[n]['raw_iteration'] = numpy.average(self.success[raw_overall_success])

            self.results[n]['raw_average'] = numpy.average(delta[raw_overall_success]) if numpy.any(raw_overall_success) else 0

            self.results[n]['raw_image'] = numpy.average(image_delta[raw_overall_success]) if numpy.any(raw_overall_success) else 0

            raw_class_success = numpy.zeros((self.N_class, self.perturbation_codes.shape[0]), bool)
            corrected_class_success = numpy.zeros((self.N_class, self.perturbation_codes.shape[0]), bool)

            self.results[n]['raw_class_success'] = numpy.zeros((self.N_class))

            self.results[n]['raw_class_average'] = numpy.zeros((self.N_class))

            self.results[n]['raw_class_image'] = numpy.zeros((self.N_class))

            for c in range(self.N_class):
                N_samples = numpy.sum(self.accuracy[self.perturbation_codes == c].astype(int))
                if N_samples <= 0:
                    continue;

                raw_class_success[c] = numpy.logical_and(raw_overall_success, self.perturbation_codes == c)

                self.results[n]['raw_class_success'][c] = numpy.sum(raw_class_success[c]) / N_samples

                if numpy.any(raw_class_success[c]):
                    self.results[n]['raw_class_average'][c] = numpy.average(delta[raw_class_success[c].astype(bool)])
                if numpy.any(corrected_class_success[c]):
                    self.results[n]['raw_class_image'][c] = numpy.average(image_delta[raw_class_success[c].astype(bool)])

        if self.args.results_file:
            utils.write_pickle(self.args.results_file, self.results)
            log('[Testing] wrote %s' % self.args.results_file)
示例#4
0
    def load_data(self):
        """
        Load data.
        """

        assert self.args.batch_size % 4 == 0

        self.train_images = utils.read_hdf5(
            self.args.train_images_file).astype(numpy.float32)
        log('[Training] read %s' % self.args.train_images_file)

        self.test_images = utils.read_hdf5(self.args.test_images_file).astype(
            numpy.float32)
        log('[Training] read %s' % self.args.test_images_file)

        # For handling both color and gray images.
        if len(self.train_images.shape) < 4:
            self.train_images = numpy.expand_dims(self.train_images, axis=3)
            self.test_images = numpy.expand_dims(self.test_images, axis=3)
            log('[Training] no color images, adjusted size')
        self.resolution = self.test_images.shape[2]
        log('[Training] resolution %d' % self.resolution)

        self.train_codes = utils.read_hdf5(self.args.train_codes_file).astype(
            numpy.int)
        assert self.train_codes.shape[1] >= self.args.label_index + 1
        self.train_codes = self.train_codes[:, self.args.label_index]
        log('[Training] read %s' % self.args.train_codes_file)
        self.N_class = numpy.max(self.train_codes) + 1

        self.test_codes = utils.read_hdf5(self.args.test_codes_file).astype(
            numpy.int)
        assert self.test_codes.shape[1] >= self.args.label_index + 1
        self.test_codes = self.test_codes[:, self.args.label_index]
        log('[Training] read %s' % self.args.test_codes_file)

        self.train_theta = utils.read_hdf5(self.args.train_theta_file).astype(
            numpy.float32)
        log('[Training] read %s' % self.args.train_theta_file)

        assert self.test_images.shape[0] == self.test_codes.shape[0]

        self.min_bound = numpy.min(self.train_theta, axis=0)
        self.max_bound = numpy.max(self.train_theta, axis=0)
        log('[Training] min bound: %s' % ' '.join(
            ['%g' % self.min_bound[i]
             for i in range(self.min_bound.shape[0])]))
        log('[Training] max bound: %s' % ' '.join(
            ['%g' % self.max_bound[i]
             for i in range(self.max_bound.shape[0])]))

        self.test_theta = utils.read_hdf5(self.args.test_theta_file).astype(
            numpy.float32)
        log('[Training] read %s' % self.args.test_theta_file)

        assert self.train_codes.shape[0] == self.train_images.shape[0]
        assert self.test_codes.shape[0] == self.test_images.shape[0]
        assert self.train_theta.shape[
            0] == self.train_images.shape[0], '%s != %s' % ('x'.join(
                list(map(str, self.train_theta.shape))), 'x'.join(
                    list(map(str, self.train_images.shape))))
        assert self.test_theta.shape[0] == self.test_images.shape[0]

        # Select subset of samples
        if self.args.training_samples < 0:
            self.args.training_samples = self.train_images.shape[0]
        else:
            self.args.training_samples = min(self.args.training_samples,
                                             self.train_images.shape[0])
        log('[Training] using %d training samples' %
            self.args.training_samples)

        if self.args.test_samples < 0:
            self.args.test_samples = self.test_images.shape[0]
        else:
            self.args.test_samples = min(self.args.test_samples,
                                         self.test_images.shape[0])

        if self.args.early_stopping:
            assert self.args.validation_samples > 0
            assert self.args.training_samples + self.args.validation_samples <= self.train_images.shape[
                0]
            self.val_images = self.train_images[self.train_images.shape[0] -
                                                self.args.validation_samples:]
            self.val_codes = self.train_codes[self.train_codes.shape[0] -
                                              self.args.validation_samples:]
            self.train_images = self.train_images[:self.train_images.shape[0] -
                                                  self.args.validation_samples]
            self.train_codes = self.train_codeſ[:self.train_codes.shape[0] -
                                                self.args.validation_samples]
            assert self.val_images.shape[
                0] == self.args.validation_samples and self.val_codes.shape[
                    0] == self.args.validation_samples

        if self.args.random_samples:
            perm = numpy.random.permutation(self.train_images.shape[0] // 10)
            perm = perm[:self.args.training_samples // 10]
            perm = numpy.repeat(perm, self.N_class, axis=0) * 10 + numpy.tile(
                numpy.array(range(self.N_class)), (perm.shape[0]))
            self.train_images = self.train_images[perm]
            self.train_codes = self.train_codes[perm]
            self.train_theta = self.train_theta[perm]
        else:
            self.train_images = self.train_images[:self.args.training_samples]
            self.train_codes = self.train_codes[:self.args.training_samples]
            self.train_theta = self.train_theta[:self.args.training_samples]

        self.train_valid = (numpy.max(numpy.abs(self.train_theta), axis=1) <=
                            self.args.bound).astype(int)
        self.test_valid = (numpy.max(numpy.abs(self.test_theta), axis=1) <=
                           self.args.bound).astype(int)

        # Check that the dataset is balanced.
        number_samples = self.train_codes.shape[0] // self.N_class
        for c in range(self.N_class):
            number_samples_ = numpy.sum(self.train_codes == c)
            if number_samples_ != number_samples:
                log(
                    '[Training] dataset not balanced, class %d should have %d samples but has %d'
                    % (c, number_samples, number_samples_), LogLevel.WARNING)
    def load_data(self):
        """
        Load data.
        """

        assert self.args.batch_size % 4 == 0

        self.database = utils.read_hdf5(self.args.database_file).astype(
            numpy.float32)
        log('[Training] read %s' % self.args.database_file)

        self.N_font = self.database.shape[0]
        self.N_class = self.database.shape[1]

        self.database = self.database.reshape(
            (self.database.shape[0] * self.database.shape[1],
             self.database.shape[2], self.database.shape[3]))
        self.database = torch.from_numpy(self.database)
        if self.args.use_gpu:
            self.database = self.database.cuda()
            self.database = torch.autograd.Variable(self.database, False)

        self.train_images = utils.read_hdf5(
            self.args.train_images_file).astype(numpy.float32)
        log('[Training] read %s' % self.args.train_images_file)

        self.test_images = utils.read_hdf5(self.args.test_images_file).astype(
            numpy.float32)
        log('[Training] read %s' % self.args.test_images_file)

        # For handling both color and gray images.
        if len(self.train_images.shape) < 4:
            self.train_images = numpy.expand_dims(self.train_images, axis=3)
            self.test_images = numpy.expand_dims(self.test_images, axis=3)
            log('[Training] no color images, adjusted size')
        self.resolution = self.train_images.shape[2]
        log('[Training] resolution %d' % self.resolution)

        self.train_codes = utils.read_hdf5(self.args.train_codes_file).astype(
            numpy.int)
        assert self.train_codes.shape[1] == 3
        log('[Training] read %s' % self.args.train_codes_file)

        self.test_codes = utils.read_hdf5(self.args.test_codes_file).astype(
            numpy.int)
        assert self.test_codes.shape[1] == 3
        log('[Training] read %s' % self.args.test_codes_file)

        self.train_theta = utils.read_hdf5(self.args.train_theta_file).astype(
            numpy.float32)
        log('[Training] read %s' % self.args.train_theta_file)

        self.min_bound = numpy.min(self.train_theta, axis=0)
        self.max_bound = numpy.max(self.train_theta, axis=0)

        self.test_theta = utils.read_hdf5(self.args.test_theta_file).astype(
            numpy.float32)
        log('[Training] read %s' % self.args.test_theta_file)

        assert self.train_codes.shape[0] == self.train_images.shape[0]
        assert self.test_codes.shape[0] == self.test_images.shape[0]
        assert self.train_theta.shape[0] == self.train_images.shape[0]
        assert self.test_theta.shape[0] == self.test_images.shape[0]

        # Select subset of samples
        if self.args.training_samples < 0:
            self.args.training_samples = self.train_images.shape[0]
        else:
            self.args.training_samples = min(self.args.training_samples,
                                             self.train_images.shape[0])
        log('[Training] found %d classes' % self.N_class)
        log('[Training] using %d training samples' %
            self.args.training_samples)

        if self.args.test_samples < 0:
            self.args.test_samples = self.test_images.shape[0]
        else:
            self.args.test_samples = min(self.args.test_samples,
                                         self.test_images.shape[0])

        if self.args.early_stopping:
            assert self.args.validation_samples > 0
            assert self.args.training_samples + self.args.validation_samples <= self.train_images.shape[
                0]
            self.val_images = self.train_images[self.train_images.shape[0] -
                                                self.args.validation_samples:]
            self.val_codes = self.train_codes[self.train_codes.shape[0] -
                                              self.args.validation_samples:,
                                              self.args.label_index]
            self.train_images = self.train_images[:self.train_images.shape[0] -
                                                  self.args.validation_samples]
            self.train_codes = self.train_codes[:self.train_codes.shape[0] -
                                                self.args.validation_samples]
            assert self.val_images.shape[
                0] == self.args.validation_samples and self.val_codes.shape[
                    0] == self.args.validation_samples

        if self.args.random_samples:
            perm = numpy.random.permutation(self.train_images.shape[0] // 10)
            perm = perm[:self.args.training_samples // 10]
            perm = numpy.repeat(perm, self.N_class, axis=0) * 10 + numpy.tile(
                numpy.array(range(self.N_class)), (perm.shape[0]))
            self.train_images = self.train_images[perm]
            self.train_codes = self.train_codes[perm]
            self.train_theta = self.train_theta[perm]
        else:
            self.train_images = self.train_images[:self.args.training_samples]
            self.train_codes = self.train_codes[:self.args.training_samples]
            self.train_theta = self.train_theta[:self.args.training_samples]

        # Check that the dataset is balanced.
        number_samples = self.train_codes.shape[0] // self.N_class
        for c in range(self.N_class):
            number_samples_ = numpy.sum(
                self.train_codes[:, self.args.label_index] == c)
            if number_samples_ != number_samples:
                log(
                    '[Training] dataset not balanced, class %d should have %d samples but has %d'
                    % (c, number_samples, number_samples_), LogLevel.WARNING)