def compute_normalized_ppca(self):
        """
        Compute PPCA.
        """

        nearest_neighbor_images = self.nearest_neighbor_images.reshape(self.nearest_neighbor_images.shape[0], -1)
        nearest_neighbor_images = nearest_neighbor_images[:self.args.n_fit]

        perturbations = self.perturbations.reshape(self.perturbations.shape[0], -1)
        test_images = self.test_images.reshape(self.test_images.shape[0], -1)
        pure_perturbations = perturbations - test_images

        nearest_neighbor_images_norms = numpy.linalg.norm(nearest_neighbor_images, ord=2, axis=1)
        perturbations_norms = numpy.linalg.norm(perturbations, ord=2, axis=1)
        test_images_norms = numpy.linalg.norm(test_images, ord=2, axis=1)
        pure_perturbations_norms = numpy.linalg.norm(pure_perturbations, ord=2, axis=1)

        success = numpy.logical_and(numpy.logical_and(self.success >= 0, self.accuracy), pure_perturbations_norms > 1e-4)
        log('[Detection] %d valid attacked samples' % numpy.sum(success))

        perturbations_norms = perturbations_norms[success]
        test_images_norms = test_images_norms[success]
        pure_perturbations_norms = pure_perturbations_norms[success]

        perturbations = perturbations[success]
        test_images = test_images[success]
        pure_perturbations = pure_perturbations[success]

        nearest_neighbor_images /= numpy.repeat(nearest_neighbor_images_norms.reshape(-1, 1), nearest_neighbor_images.shape[1], axis=1)
        perturbations /= numpy.repeat(perturbations_norms.reshape(-1, 1), perturbations.shape[1], axis=1)
        test_images /= numpy.repeat(test_images_norms.reshape(-1, 1), test_images.shape[1], axis=1)
        pure_perturbations /= numpy.repeat(pure_perturbations_norms.reshape(-1, 1), pure_perturbations.shape[1], axis=1)

        assert not numpy.any(nearest_neighbor_images != nearest_neighbor_images)
        assert not numpy.any(perturbations != perturbations)
        assert not numpy.any(test_images != test_images)
        assert not numpy.any(pure_perturbations != pure_perturbations)

        ppca = PPCA(n_components=self.args.n_pca)
        ppca.fit(nearest_neighbor_images)
        log('[Experiment] computed PPCA on nearest neighbor images')

        reconstructed_test_images = ppca.inverse_transform(ppca.transform(test_images))
        reconstructed_perturbations = ppca.inverse_transform(ppca.transform(perturbations))
        reconstructed_pure_perturbations = ppca.inverse_transform(ppca.transform(pure_perturbations))
        
        #self.probabilities['test'] = ppca.marginal(test_images)
        #self.probabilities['perturbation'] = ppca.marginal(perturbations)
        #self.probabilities['true'] = ppca.marginal(pure_perturbations)

        self.distances['test'] = numpy.average(numpy.multiply(reconstructed_test_images - test_images, reconstructed_test_images - test_images), axis=1)
        self.distances['perturbation'] = numpy.average(numpy.multiply(reconstructed_perturbations - perturbations, reconstructed_perturbations - perturbations), axis=1)
        self.distances['true'] = numpy.average(numpy.multiply(reconstructed_pure_perturbations - pure_perturbations, reconstructed_pure_perturbations - pure_perturbations), axis=1)

        self.angles['test'] = numpy.rad2deg(common.numpy.angles(test_images.T, reconstructed_test_images.T))
        self.angles['perturbation'] = numpy.rad2deg(common.numpy.angles(reconstructed_perturbations.T, perturbations.T))
        self.angles['true'] = numpy.rad2deg(common.numpy.angles(reconstructed_pure_perturbations.T, pure_perturbations.T))
Пример #2
0
    def load_data(self):
        """
        Load data and model.
        """

        self.test_images = utils.read_hdf5(self.args.test_images_file).astype(
            numpy.float32)
        log('[Testing] read %s' % self.args.test_images_file)

        if len(self.test_images.shape) < 4:
            self.test_images = numpy.expand_dims(self.test_images, axis=3)

        self.test_codes = utils.read_hdf5(self.args.test_codes_file)
        self.test_codes = self.test_codes[:, self.args.label_index]
        log('[Testing] read %s' % self.args.test_codes_file)

        self.perturbations = utils.read_hdf5(self.args.perturbations_file)
        if len(self.perturbations.shape) > 3:
            self.perturbations = self.perturbations.reshape(
                (self.perturbations.shape[0], self.perturbations.shape[1], -1))
        self.perturbation_images = self.test_images[:self.perturbations.
                                                    shape[1]].reshape(
                                                        self.perturbations.
                                                        shape[1], -1)
        self.perturbation_codes = self.test_codes[:self.perturbations.shape[1]]
        log('[Testing] read %s' % self.args.perturbations_file)
        assert not numpy.any(
            self.perturbations != self.perturbations), 'NaN in perturbations'

        self.success = utils.read_hdf5(self.args.success_file)
        log('[Testing] read %s' % self.args.success_file)

        self.probabilities = utils.read_hdf5(self.args.probabilities_file)
        log('[Testing] read %s' % self.args.probabilities_file)
Пример #3
0
    def load_data(self):
        """
        Load data and model.
        """

        self.test_images = utils.read_hdf5(self.args.test_images_file).astype(numpy.float32)
        log('[Testing] read %s' % self.args.test_images_file)

        # For handling both color and gray images.
        if len(self.test_images.shape) < 4:
            self.test_images = numpy.expand_dims(self.test_images, axis=3)
            log('[Testing] no color images, adjusted size')
        self.resolution = self.test_images.shape[2]
        log('[Testing] resolution %d' % self.resolution)

        self.train_images = utils.read_hdf5(self.args.train_images_file).astype(numpy.float32)
        # !
        self.train_images = self.train_images.reshape((self.train_images.shape[0], -1))
        log('[Testing] read %s' % self.args.train_images_file)

        self.test_codes = utils.read_hdf5(self.args.test_codes_file).astype(numpy.int)
        self.test_codes = self.test_codes[:, self.args.label_index]
        self.N_class = numpy.max(self.test_codes) + 1
        log('[Testing] read %s' % self.args.test_codes_file)

        self.accuracy = utils.read_hdf5(self.args.accuracy_file)
        log('[Testing] read %s' % self.args.accuracy_file)

        self.perturbations = utils.read_hdf5(self.args.perturbations_file).astype(numpy.float32)
        self.N_attempts = self.perturbations.shape[0]

        # First, repeat relevant data.
        self.test_images = numpy.repeat(self.test_images[:self.perturbations.shape[1]], self.N_attempts, axis=0)
        self.perturbation_codes = numpy.repeat(self.test_codes[:self.perturbations.shape[1]], self.N_attempts, axis=0)
        self.perturbation_codes = numpy.squeeze(self.perturbation_codes)
        self.accuracy = numpy.repeat(self.accuracy[:self.perturbations.shape[1]], self.N_attempts, axis=0)

        # Then, reshape the perturbations!
        self.perturbations = numpy.swapaxes(self.perturbations, 0, 1)
        self.perturbations = self.perturbations.reshape((self.perturbations.shape[0] * self.perturbations.shape[1], -1))
        assert self.perturbations.shape[1] == self.args.N_theta
        log('[Testing] read %s' % self.args.perturbations_file)
        assert not numpy.any(self.perturbations != self.perturbations), 'NaN in perturbations'

        self.success = utils.read_hdf5(self.args.success_file)
        self.success = numpy.swapaxes(self.success, 0, 1)
        self.success = self.success.reshape((self.success.shape[0] * self.success.shape[1]))
        log('[Testing] read %s' % self.args.success_file)

        log('[Testing] using %d input channels' % self.test_images.shape[3])
        assert self.args.N_theta > 0 and self.args.N_theta <= 9
        decoder = models.STNDecoder(self.args.N_theta)
        # decoder.eval()
        log('[Testing] set up STN decoder')

        self.model = decoder
Пример #4
0
    def compute_statistics(self):
        """
        Compute statistics based on distances.
        """

        num_attempts = self.perturbations.shape[0]

        perturbations = numpy.swapaxes(self.perturbations, 0, 1)
        perturbations = perturbations.reshape(
            (perturbations.shape[0] * perturbations.shape[1],
             perturbations.shape[2]))
        success = numpy.swapaxes(self.success, 0, 1)
        success = success.reshape((success.shape[0] * success.shape[1]))

        probabilities = numpy.swapaxes(self.probabilities, 0, 1)
        probabilities = probabilities.reshape(
            (probabilities.shape[0] * probabilities.shape[1], -1))
        confidences = numpy.max(probabilities, 1)

        perturbation_probabilities = self.test_probabilities[:self.success.
                                                             shape[1]]
        perturbation_probabilities = numpy.repeat(perturbation_probabilities,
                                                  num_attempts,
                                                  axis=0)
        perturbation_confidences = numpy.max(perturbation_probabilities, 1)

        probability_ratios = confidences / perturbation_confidences

        raw_overall_success = success >= 0
        log('[Testing] %d valid attacks' % numpy.sum(raw_overall_success))

        # For off-manifold attacks this should not happen, but save is save.
        if not numpy.any(raw_overall_success):
            for type in [
                    'raw_success', 'raw_iteration', 'raw_roc',
                    'raw_confidence_weighted_success', 'raw_confidence',
                    'raw_ratios'
            ]:
                self.results[type] = 0
            if self.args.results_file:
                utils.write_pickle(self.args.results_file, self.results)
                log('[Testing] wrote %s' % self.args.results_file)
            log('[Testing] no successful attacks found, no plots')
            return

        #
        # We compute some simple statistics:
        # - raw success rate: fraction of successful attack without considering epsilon
        # - corrected success rate: fraction of successful attacks within epsilon-ball
        # - raw average perturbation: average distance to original samples (for successful attacks)
        # - corrected average perturbation: average distance to original samples for perturbations
        #   within epsilon-ball (for successful attacks).
        # These statistics can also be computed per class.
        # And these statistics are computed with respect to three norms.

        if self.args.plot_directory and utils.display():
            iterations = success[raw_overall_success]
            x = numpy.arange(numpy.max(iterations) + 1)
            y = numpy.bincount(iterations)
            plot_file = os.path.join(self.args.plot_directory, 'iterations')
            plot.bar(plot_file,
                     x,
                     y,
                     title='Distribution of Iterations of Successful Attacks',
                     xlabel='Number of Iterations',
                     ylabel='Count')
            log('[Testing] wrote %s' % plot_file)

            plot_file = os.path.join(self.args.plot_directory, 'probabilities')
            plot.histogram(plot_file, confidences[raw_overall_success], 50)
            log('[Testing] wrote %s' % plot_file)

            plot_file = os.path.join(self.args.plot_directory,
                                     'probability_ratios')
            plot.histogram(plot_file, probability_ratios, 50)
            log('[Testing] wrote %s' % plot_file)

            plot_file = os.path.join(self.args.plot_directory,
                                     'test_probabilities')
            plot.histogram(
                plot_file, self.test_probabilities[
                    numpy.arange(self.test_probabilities.shape[0]),
                    self.test_codes], 50)
            log('[Testing] wrote %s' % plot_file)

        y_true = numpy.concatenate(
            (numpy.zeros(confidences.shape[0]),
             numpy.ones(perturbation_confidences.shape[0])))
        y_score = numpy.concatenate((confidences, perturbation_confidences))
        roc_auc_score = sklearn.metrics.roc_auc_score(y_true, y_score)

        self.results['raw_roc'] = roc_auc_score
        self.results['raw_confidence_weighted_success'] = numpy.sum(
            confidences[raw_overall_success]) / numpy.sum(
                perturbation_confidences)
        self.results['raw_confidence'] = numpy.mean(
            probabilities[raw_overall_success])
        self.results['raw_ratios'] = numpy.mean(
            probability_ratios[raw_overall_success])
        self.results['raw_success'] = numpy.sum(
            raw_overall_success) / success.shape[0]
        self.results['raw_iteration'] = numpy.average(
            success[raw_overall_success])

        if self.args.results_file:
            utils.write_pickle(self.args.results_file, self.results)
            log('[Testing] wrote %s' % self.args.results_file)
Пример #5
0
    def compute_statistics(self):
        """
        Compute statistics based on distances.
        """

        # That's the basis for all computation as we only want to consider successful attacks
        # on test samples that were correctly classified.
        raw_overall_success = numpy.logical_and(self.success >= 0, self.accuracy)

        # Important check, for on-manifold attack this will happen if the manifold is small and the model very accurate!
        if not numpy.any(raw_overall_success):
            for n in range(len(self.norms)):
                for type in ['raw_success', 'raw_iteration', 'raw_average', 'raw_image']:
                    self.results[n][type] = 0
                for type in ['raw_class_success', 'raw_class_average', 'raw_class_image']:
                    self.results[n][type] = numpy.zeros((self.N_class))
            if self.args.results_file:
                utils.write_pickle(self.args.results_file, self.results)
                log('[Testing] wrote %s' % self.args.results_file)
            return

        #
        # Compute nearest neighbor statistics in image space.
        #

        if self.args.plot_directory and self.args.plot_manifolds and utils.display():
            log('[Testing] computing nearest neighbor ...')
            nearest_neighbors_indices = self.compute_nearest_neighbors(self.perturbation_images[raw_overall_success])
            pure_perturbations = self.test_images[raw_overall_success] - self.perturbation_images[raw_overall_success]
            pure_perturbations_norm = numpy.linalg.norm(pure_perturbations, ord=2, axis=1)
            for k in range(10):
                direction = self.perturbation_images[raw_overall_success] - self.train_images[nearest_neighbors_indices[:, k]]
                direction_norm = numpy.linalg.norm(direction, ord=2, axis=1)
                dot_products = numpy.einsum('ij,ij->i', direction, pure_perturbations)
                dot_product_norms = numpy.multiply(pure_perturbations_norm, direction_norm)
                dot_products, dot_product_norms = dot_products[dot_product_norms > 10**-8], dot_product_norms[dot_product_norms > 10**-8]
                dot_products /= dot_product_norms
                dot_products = numpy.degrees(numpy.arccos(dot_products))

                # matplotlib's hsitogram plots give weird error if there are NaN values, so simple check:
                if dot_products.shape[0] > 0 and not numpy.any(dot_products != dot_products):
                    plot_file = os.path.join(self.args.plot_directory, 'dot_products_nn%d' % k)
                    plot.histogram(plot_file, dot_products, 100, xmin=numpy.min(dot_products), xmax=numpy.max(dot_products),
                                  title='Dot Products Between Adversarial Perturbations and Direction to Nearest Neighbor %d' % k,
                                  xlabel='Dot Product', ylabel='Count')
                    log('[Testing] wrote %s' % plot_file)

        #
        # We compute some simple statistics:
        # - raw success rate: fraction of successful attack without considering epsilon
        # - corrected success rate: fraction of successful attacks within epsilon-ball
        # - raw average perturbation: average distance to original samples (for successful attacks)
        # - corrected average perturbation: average distance to original samples for perturbations
        #   within epsilon-ball (for successful attacks).
        # These statistics can also be computed per class.
        # And these statistics are computed with respect to three norms.

        if self.args.plot_directory and utils.display():
            iterations = self.success[raw_overall_success]
            x = numpy.arange(numpy.max(iterations) + 1)
            y = numpy.bincount(iterations)
            plot_file = os.path.join(self.args.plot_directory, 'iterations')
            plot.bar(plot_file, x, y,
                    title='Distribution of Iterations of Successful Attacks', xlabel='Number of Iterations', ylabel='Count')
            log('[Testing] wrote %s' % plot_file)

        reference_perturbations = numpy.zeros(self.perturbations.shape)
        if self.args.N_theta > 4:
            reference_perturbations[:, 4] = 1

        for n in range(len(self.norms)):
            norm = self.norms[n]
            delta = numpy.linalg.norm(self.perturbations - reference_perturbations, norm, axis=1)
            image_delta = numpy.linalg.norm(self.test_images - self.perturbation_images, norm, axis=1)

            if self.args.plot_directory and utils.display():
                plot_file = os.path.join(self.args.plot_directory, 'distances_l%g' % norm)
                plot.histogram(plot_file, delta[raw_overall_success], 50, title='Distribution of $L_{%g}$ Distances of Successful Attacks' % norm,
                              xlabel='Distance', ylabel='Count')
                log('[Testing] wrote %s' % plot_file)

            debug_accuracy = numpy.sum(self.accuracy) / self.accuracy.shape[0]
            debug_attack_fraction = numpy.sum(raw_overall_success) / numpy.sum(self.success >= 0)
            debug_test_fraction = numpy.sum(raw_overall_success) / numpy.sum(self.accuracy)
            log('[Testing] attacked mode accuracy: %g' % debug_accuracy)
            log('[Testing] only %g of successful attacks are valid' % debug_attack_fraction)
            log('[Testing] only %g of correct samples are successfully attacked' % debug_test_fraction)

            N_accuracy = numpy.sum(self.accuracy)
            self.results[n]['raw_success'] = numpy.sum(raw_overall_success) / N_accuracy

            self.results[n]['raw_iteration'] = numpy.average(self.success[raw_overall_success])

            self.results[n]['raw_average'] = numpy.average(delta[raw_overall_success]) if numpy.any(raw_overall_success) else 0

            self.results[n]['raw_image'] = numpy.average(image_delta[raw_overall_success]) if numpy.any(raw_overall_success) else 0

            raw_class_success = numpy.zeros((self.N_class, self.perturbation_codes.shape[0]), bool)
            corrected_class_success = numpy.zeros((self.N_class, self.perturbation_codes.shape[0]), bool)

            self.results[n]['raw_class_success'] = numpy.zeros((self.N_class))

            self.results[n]['raw_class_average'] = numpy.zeros((self.N_class))

            self.results[n]['raw_class_image'] = numpy.zeros((self.N_class))

            for c in range(self.N_class):
                N_samples = numpy.sum(self.accuracy[self.perturbation_codes == c].astype(int))
                if N_samples <= 0:
                    continue;

                raw_class_success[c] = numpy.logical_and(raw_overall_success, self.perturbation_codes == c)

                self.results[n]['raw_class_success'][c] = numpy.sum(raw_class_success[c]) / N_samples

                if numpy.any(raw_class_success[c]):
                    self.results[n]['raw_class_average'][c] = numpy.average(delta[raw_class_success[c].astype(bool)])
                if numpy.any(corrected_class_success[c]):
                    self.results[n]['raw_class_image'][c] = numpy.average(image_delta[raw_class_success[c].astype(bool)])

        if self.args.results_file:
            utils.write_pickle(self.args.results_file, self.results)
            log('[Testing] wrote %s' % self.args.results_file)
    def attack(self):
        """
        Test the model.
        """

        assert self.model is not None
        assert self.model.classifier.training is False

        concatenate_axis = -1
        if os.path.exists(self.args.perturbations_file) and os.path.exists(self.args.success_file):
            self.original_perturbations = utils.read_hdf5(self.args.perturbations_file)
            assert len(self.original_perturbations.shape) == 3, self.original_perturbations.shape
            log('[Attack] read %s' % self.args.perturbations_file)

            self.original_success = utils.read_hdf5(self.args.success_file)
            log('[Attack] read %s' % self.args.success_file)

            assert self.original_perturbations.shape[0] == self.original_success.shape[0]
            assert self.original_perturbations.shape[1] == self.original_success.shape[1]
            assert self.original_perturbations.shape[2] == self.test_theta.shape[1]

            if self.original_perturbations.shape[1] <= self.args.max_samples and self.original_perturbations.shape[0] <= self.args.max_attempts:
                log('[Attack] found %d attempts, %d samples, requested no more' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1]))
                return
            elif self.original_perturbations.shape[0] == self.args.max_attempts or self.original_perturbations.shape[1] == self.args.max_samples:
                if self.original_perturbations.shape[0] == self.args.max_attempts:
                    self.test_theta = self.test_theta[self.original_perturbations.shape[1]:]
                    self.test_fonts = self.test_fonts[self.original_perturbations.shape[1]:]
                    self.test_classes = self.test_classes[self.original_perturbations.shape[1]:]
                    self.args.max_samples = self.args.max_samples - self.original_perturbations.shape[1]
                    concatenate_axis = 1
                    log('[Attack] found %d attempts with %d perturbations, computing %d more perturbations' % (
                    self.original_perturbations.shape[0], self.original_perturbations.shape[1], self.args.max_samples))
                elif self.original_perturbations.shape[1] == self.args.max_samples:
                    self.args.max_attempts = self.args.max_attempts - self.original_perturbations.shape[0]
                    concatenate_axis = 0
                    log('[Attack] found %d attempts with %d perturbations, computing %d more attempts' % (
                    self.original_perturbations.shape[0], self.original_perturbations.shape[1], self.args.max_attempts))

        self.perturbations = numpy.zeros((self.args.max_attempts, self.args.max_samples, self.test_theta.shape[1]))
        self.success = numpy.ones((self.args.max_attempts, self.args.max_samples), dtype=int) * -1

        if self.args.attack.find('Batch') >= 0:
            batch_size = min(self.args.batch_size, self.args.max_samples)
        else:
            batch_size = 1

        objective = self.objective_class()
        num_batches = int(math.ceil(self.args.max_samples/batch_size))

        for i in range(num_batches):
            if i*batch_size == self.args.max_samples:
                break

            i_start = i * batch_size
            i_end = min((i + 1) * batch_size, self.args.max_samples)

            batch_fonts = self.test_fonts[i_start: i_end]
            batch_classes = self.test_classes[i_start: i_end]
            batch_code = numpy.concatenate((common.numpy.one_hot(batch_fonts, self.N_font), common.numpy.one_hot(batch_classes, self.N_class)), axis=1).astype(numpy.float32)

            batch_classes = common.torch.as_variable(batch_classes, self.args.use_gpu)
            batch_inputs = common.torch.as_variable(self.test_theta[i_start: i_end], self.args.use_gpu)
            batch_code = common.torch.as_variable(batch_code, self.args.use_gpu)

            t = 0
            # This basically allows to only optimize over theta, keeping the font/class code fixed.
            self.model.decoder.set_code(batch_code)

            while True and t < self.args.max_attempts:
                attack = self.setup_attack(batch_inputs, batch_classes)
                success, perturbations, probabilities, norm, _ = attack.run(objective)
                assert not numpy.any(perturbations != perturbations), perturbations

                # Note that we save the perturbed image, not only the perturbation!
                perturbations = perturbations.reshape(batch_inputs.size())  # hack for when only one dimensional latent space is used!
                self.perturbations[t][i_start: i_end] = perturbations + batch_inputs.cpu().numpy()
                self.success[t][i_start: i_end] = success
                t += 1

            log('[Attack] %d: completed' % i)

        if concatenate_axis >= 0:
            if self.perturbations.shape[0] == self.args.max_attempts:
                self.perturbations = numpy.concatenate((self.original_perturbations, self.perturbations), axis=concatenate_axis)
                self.success = numpy.concatenate((self.original_success, self.success), axis=concatenate_axis)
                log('[Attack] concatenated')

        utils.write_hdf5(self.args.perturbations_file, self.perturbations)
        log('[Attack] wrote %s' % self.args.perturbations_file)
        utils.write_hdf5(self.args.success_file, self.success)
        log('[Attack] wrote %s' % self.args.success_file)
    def visualize_perturbations(self):
        """
        Visualize perturbations.
        """

        num_attempts = self.perturbations.shape[1]
        num_attempts = min(num_attempts, 6)
        utils.makedir(self.args.output_directory)

        count = 0
        for i in range(min(1000, self.perturbations.shape[0])):

            log('[Visualization] sample %d, iterations %s and correctly classified: %s'
                % (i + 1, ' '.join(list(map(
                    str, self.success[i]))), self.accuracy[i]))
            if not numpy.any(self.success[i] >= 0) or not self.accuracy[i]:
                continue
            elif count > 200:
                break

            #fig, axes = pyplot.subplots(num_attempts, 8)
            #if num_attempts == 1:
            #    axes = [axes] # dirty hack for axis indexing

            for j in range(num_attempts):
                theta = self.test_theta[i]
                theta_attack = self.perturbations[i][j]
                theta_perturbation = theta_attack - theta

                image = self.test_images[i]
                image_attack = self.perturbation_images[i][j]
                image_perturbation = image_attack - image

                max_theta_perturbation = numpy.max(
                    numpy.abs(theta_perturbation))
                theta_perturbation /= max_theta_perturbation

                max_image_perturbation = numpy.max(
                    numpy.abs(image_perturbation))
                image_perturbation /= max_image_perturbation

                image_representation = self.theta_representations[i]
                attack_representation = self.perturbation_representations[i][j]

                image_label = numpy.argmax(image_representation)
                attack_label = numpy.argmax(attack_representation)

                #vmin = min(numpy.min(theta), numpy.min(theta_attack))
                #vmax = max(numpy.max(theta), numpy.max(theta_attack))
                #axes[j][0].imshow(theta.reshape(1, -1), interpolation='nearest', vmin=vmin, vmax=vmax)
                #axes[j][1].imshow(numpy.squeeze(image), interpolation='nearest', cmap='gray', vmin=0, vmax=1)
                #axes[j][2].imshow(theta_perturbation.reshape(1, -1), interpolation='nearest', vmin=vmin, vmax=vmax)
                #axes[j][2].text(0, -1, 'x' + str(max_theta_perturbation))
                #axes[j][3].imshow(numpy.squeeze(image_perturbation), interpolation='nearest', cmap='seismic', vmin=-1, vmax=1)
                #axes[j][3].text(0, -image.shape[1]//8, 'x' + str(max_image_perturbation))
                #axes[j][4].imshow(theta_attack.reshape(1, -1), interpolation='nearest', vmin=vmin, vmax=vmax)
                #axes[j][5].imshow(numpy.squeeze(image_attack), interpolation='nearest', cmap='gray', vmin=0, vmax=1)

                #axes[j][6].imshow(image_representation.reshape(1, -1), interpolation='nearest', vmin=vmin, vmax=vmax)
                #axes[j][6].text(0, -1, 'Label:' + str(image_label))
                #axes[j][7].imshow(attack_representation.reshape(1, -1), interpolation='nearest', vmin=vmin, vmax=vmax)
                #axes[j][7].text(0, -1, 'Label:' + str(attack_label))

                image_file = os.path.join(
                    self.args.output_directory,
                    '%d_%d_image_%d.png' % (i, j, image_label))
                attack_file = os.path.join(
                    self.args.output_directory,
                    '%d_%d_attack_%d.png' % (i, j, attack_label))
                perturbation_file = os.path.join(
                    self.args.output_directory, '%d_%d_perturbation_%g.png' %
                    (i, j, max_image_perturbation))

                vis.image(image_file, image, scale=10)
                vis.image(attack_file, image_attack, scale=10)
                vis.perturbation(perturbation_file,
                                 image_perturbation,
                                 scale=10)

            #plot_file = os.path.join(self.args.output_directory, str(i) + '.png')
            #pyplot.savefig(plot_file)
            #pyplot.close(fig)
            count += 1
Пример #8
0
    def attack(self):
        """
        Test the model.
        """

        assert self.model is not None
        assert self.model.training is False
        assert self.test_images.shape[0] == self.test_codes.shape[0], 'number of samples has to match'

        concatenate_axis = -1
        if os.path.exists(self.args.perturbations_file) and os.path.exists(self.args.success_file):
            self.original_perturbations = utils.read_hdf5(self.args.perturbations_file)
            if self.test_images.shape[3] > 1:
                assert len(self.original_perturbations.shape) == 5
            else:
                assert len(self.original_perturbations.shape) == 4
            log('[Attack] read %s' % self.args.perturbations_file)

            self.original_success = utils.read_hdf5(self.args.success_file)
            log('[Attack] read %s' % self.args.success_file)

            assert self.original_perturbations.shape[0] == self.original_success.shape[0]
            assert self.original_perturbations.shape[1] == self.original_success.shape[1]
            assert self.original_perturbations.shape[2] == self.test_images.shape[1]
            assert self.original_perturbations.shape[3] == self.test_images.shape[2]#

            if self.original_perturbations.shape[1] >= self.args.max_samples and self.original_perturbations.shape[0] >= self.args.max_attempts:
                log('[Attack] found %d attempts, %d samples, requested no more' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1]))
                return
            elif self.original_perturbations.shape[0] == self.args.max_attempts or self.original_perturbations.shape[1] == self.args.max_samples:
                if self.original_perturbations.shape[0] == self.args.max_attempts:
                    self.test_images = self.test_images[self.original_perturbations.shape[1]:]
                    self.test_codes = self.test_codes[self.original_perturbations.shape[1]:]
                    self.args.max_samples = self.args.max_samples - self.original_perturbations.shape[1]
                    concatenate_axis = 1
                    log('[Attack] found %d attempts with %d perturbations, computing %d more perturbations' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1], self.args.max_samples))
                elif self.original_perturbations.shape[1] == self.args.max_samples:
                    self.args.max_attempts = self.args.max_attempts - self.original_perturbations.shape[0]
                    concatenate_axis = 0
                    log('[Attack] found %d attempts with %d perturbations, computing %d more attempts' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1], self.args.max_attempts))

        # can't squeeze here!
        if self.test_images.shape[3] > 1:
            self.perturbations = numpy.zeros((self.args.max_attempts, self.args.max_samples, self.test_images.shape[1], self.test_images.shape[2], self.test_images.shape[3]))
        else:
            self.perturbations = numpy.zeros((self.args.max_attempts, self.args.max_samples, self.test_images.shape[1], self.test_images.shape[2]))
        self.success = numpy.ones((self.args.max_attempts, self.args.max_samples), dtype=int) * -1

        if self.args.attack.find('Batch') >= 0:
            batch_size = min(self.args.batch_size, self.args.max_samples)
        else:
            batch_size = 1

        objective = self.objective_class()
        num_batches = int(math.ceil(self.args.max_samples/batch_size))

        for i in range(num_batches):  # self.test_images.shape[0]
            if i*batch_size == self.args.max_samples:
                break
                
            i_start = i*batch_size
            i_end = min((i+1)*batch_size, self.args.max_samples)

            batch_images = common.torch.as_variable(self.test_images[i_start: i_end], self.args.use_gpu)
            batch_classes = common.torch.as_variable(numpy.array(self.test_codes[i_start: i_end]), self.args.use_gpu)
            batch_images = batch_images.permute(0, 3, 1, 2)

            t = 0
            while t < self.args.max_attempts:
                attack = self.setup_attack(batch_images, batch_classes)
                success, perturbations, probabilities, norm, _ = attack.run(objective)
                assert not numpy.any(perturbations != perturbations), perturbations

                # Note that we save the perturbed image, not only the perturbation!
                self.perturbations[t][i_start: i_end] = numpy.squeeze(numpy.transpose(perturbations + batch_images.cpu().numpy(), (0, 2, 3, 1)))
                self.success[t][i_start: i_end] = success

                # IMPORTANT: The adversarial examples are not considering whether the classifier is
                # actually correct to start with.

                t += 1

            log('[Attack] %d: completed' % i)

        if concatenate_axis >= 0:
            if self.perturbations.shape[0] == self.args.max_attempts:
                self.perturbations = numpy.concatenate((self.original_perturbations, self.perturbations), axis=concatenate_axis)
                self.success = numpy.concatenate((self.original_success, self.success), axis=concatenate_axis)
                log('[Attack] concatenated')

        utils.write_hdf5(self.args.perturbations_file, self.perturbations)
        log('[Attack] wrote %s' % self.args.perturbations_file)
        utils.write_hdf5(self.args.success_file, self.success)
        log('[Attack] wrote %s' % self.args.success_file)
    def visualize_perturbations(self):
        """
        Visualize perturbations.
        """

        num_attempts = self.perturbations.shape[1]
        num_attempts = min(num_attempts, 6)
        utils.makedir(self.args.output_directory)

        count = 0
        for i in range(min(1000, self.perturbations.shape[0])):

            if not numpy.any(self.success[i]) or not self.accuracy[i]:
                continue
            elif count > 200:
                break

            #fig, axes = pyplot.subplots(num_attempts, 5)
            #if num_attempts == 1:
            #    axes = [axes] # dirty hack for axis indexing

            for j in range(num_attempts):
                image = self.test_images[i]
                attack = self.perturbations[i][j]
                perturbation = attack - image
                max_perturbation = numpy.max(numpy.abs(perturbation))
                perturbation /= max_perturbation

                image_representation = self.image_representations[i]
                attack_representation = self.perturbation_representations[i][j]

                image_label = numpy.argmax(image_representation)
                attack_label = numpy.argmax(attack_representation)

                #axes[j][0].imshow(numpy.squeeze(image), interpolation='nearest', cmap='gray', vmin=0, vmax=1)
                #axes[j][1].imshow(numpy.squeeze(perturbation), interpolation='nearest', cmap='seismic', vmin=-1, vmax=1)
                #axes[j][1].text(0, -image.shape[1]//8, 'x' + str(max_perturbation))
                #axes[j][2].imshow(numpy.squeeze(attack), interpolation='nearest', cmap='gray', vmin=0, vmax=1)

                #vmin = min(numpy.min(image_representation), numpy.min(attack_representation))
                #vmax = max(numpy.max(image_representation), numpy.max(attack_representation))
                #axes[j][3].imshow(image_representation.reshape(1, -1), interpolation='nearest', vmin=vmin, vmax=vmax)
                #axes[j][3].text(0, -1, 'Label:' + str(image_label))
                #axes[j][4].imshow(attack_representation.reshape(1, -1), interpolation='nearest', vmin=vmin, vmax=vmax)
                #axes[j][4].text(0, -1, 'Label:' + str(attack_label))

                image_file = os.path.join(
                    self.args.output_directory,
                    '%d_%d_image_%d.png' % (i, j, image_label))
                attack_file = os.path.join(
                    self.args.output_directory,
                    '%d_%d_attack_%d.png' % (i, j, attack_label))
                perturbation_file = os.path.join(
                    self.args.output_directory,
                    '%d_%d_perturbation_%g.png' % (i, j, max_perturbation))

                vis.image(image_file, image, scale=10)
                vis.image(attack_file, attack, scale=10)
                vis.perturbation(perturbation_file, perturbation, scale=10)

                if len(perturbation.shape) > 2:
                    perturbation_magnitude = numpy.linalg.norm(perturbation,
                                                               ord=2,
                                                               axis=2)
                    max_perturbation_magnitude = numpy.max(
                        numpy.abs(perturbation_magnitude))
                    perturbation_magnitude /= max_perturbation_magnitude

                    perturbation_file = os.path.join(
                        self.args.output_directory,
                        '%d_%d_perturbation_magnitude_%g.png' %
                        (i, j, max_perturbation_magnitude))
                    vis.perturbation(perturbation_file,
                                     perturbation_magnitude,
                                     scale=10)

            #plot_file = os.path.join(self.args.output_directory, str(i) + '.png')
            #pyplot.savefig(plot_file)
            #pyplot.close(fig)
            count += 1
    def attack(self):
        """
        Test the model.
        """

        assert self.model is not None
        assert self.model.training is False

        if self.args.attack.find('Batch') >= 0:
            batch_size = min(self.args.batch_size, self.args.max_samples)
        else:
            batch_size = 1

        objective = self.objective_class()
        num_batches = int(math.ceil(self.args.max_samples / batch_size))

        # can't squeeze here!
        if self.test_images.shape[3] > 1:
            self.perturbations = numpy.zeros(
                (self.args.max_attempts, self.args.max_samples,
                 self.test_images.shape[1], self.test_images.shape[2],
                 self.test_images.shape[3]))
        else:
            self.perturbations = numpy.zeros(
                (self.args.max_attempts, self.args.max_samples,
                 self.test_images.shape[1], self.test_images.shape[2]))
        self.success = numpy.ones(
            (self.args.max_attempts, self.args.max_samples), dtype=int) * -1
        self.probabilities = numpy.zeros(
            (self.args.max_attempts, self.args.max_samples, self.N_class))

        for i in range(num_batches):  # self.test_images.shape[0]
            if i * batch_size == self.args.max_samples:
                break

            i_start = i * batch_size
            i_end = min((i + 1) * batch_size, self.args.max_samples)

            batch_images = numpy.random.randint(0,
                                                255,
                                                size=[batch_size] +
                                                self.test_images.shape[1:])
            batch_images = common.torch.as_variable(batch_images,
                                                    self.args.use_gpu)
            batch_images = batch_images.permute(0, 3, 1, 2)

            batch_classes = common.torch.as_variable(
                numpy.random.randint(0,
                                     self.N_class - 1,
                                     size=(batch_images.size(0))),
                self.args.use_gpu)

            t = 0
            while t < self.args.max_attempts:
                attack = self.setup_attack(batch_images, batch_classes)
                success, perturbations, probabilities, norm, _ = attack.run(
                    objective)
                assert not numpy.any(
                    perturbations != perturbations), perturbations

                # Note that we save the perturbed image, not only the perturbation!
                self.perturbations[t][i_start:i_end] = numpy.squeeze(
                    numpy.transpose(perturbations + batch_images.cpu().numpy(),
                                    (0, 2, 3, 1)))
                self.success[t][i_start:i_end] = success
                self.probabilities[t][i_start:i_end] = probabilities
                # IMPORTANT: The adversarial examples are not considering whether the classifier is
                # actually correct to start with.

                t += 1

            log('[Attack] %d: completed' % i)

        utils.write_hdf5(self.args.perturbations_file, self.perturbations)
        log('[Attack] wrote %s' % self.args.perturbations_file)
        utils.write_hdf5(self.args.success_file, self.success)
        log('[Attack] wrote %s' % self.args.success_file)
        utils.write_hdf5(self.args.probabilities_file, self.probabilities)
        log('[Attack] wrote %s' % self.args.probabilities_file)
Пример #11
0
    def load_data(self):
        """
        Load data and model.
        """

        self.test_images = utils.read_hdf5(self.args.test_images_file).astype(numpy.float32)
        log('[Testing] read %s' % self.args.test_images_file)

        # For handling both color and gray images.
        if len(self.test_images.shape) < 4:
            self.test_images = numpy.expand_dims(self.test_images, axis=3)
            log('[Testing] no color images, adjusted size')
        self.resolution = self.test_images.shape[2]
        log('[Testing] resolution %d' % self.resolution)

        self.train_images = utils.read_hdf5(self.args.train_images_file).astype(numpy.float32)
        # !
        self.train_images = self.train_images.reshape((self.train_images.shape[0], -1))
        log('[Testing] read %s' % self.args.train_images_file)

        self.test_theta = utils.read_hdf5(self.args.test_theta_file).astype(numpy.float32)
        log('[Testing] read %s' % self.args.test_theta_file)

        self.train_theta = utils.read_hdf5(self.args.train_theta_file).astype(numpy.float32)
        log('[Testing] read %s' % self.args.train_theta_file)

        self.test_codes = utils.read_hdf5(self.args.test_codes_file).astype(numpy.int)
        self.test_codes = self.test_codes[:, self.args.label_index]
        self.N_class = numpy.max(self.test_codes) + 1
        log('[Testing] read %s' % self.args.test_codes_file)

        self.accuracy = utils.read_hdf5(self.args.accuracy_file)
        log('[Testing] read %s' % self.args.accuracy_file)

        self.perturbations = utils.read_hdf5(self.args.perturbations_file).astype(numpy.float32)
        self.N_attempts = self.perturbations.shape[0]
        assert not numpy.any(self.perturbations != self.perturbations), 'NaN in perturbations'

        # First, repeat relevant data.
        self.perturbation_theta = numpy.repeat(self.test_theta[:self.perturbations.shape[1]], self.N_attempts, axis=0)
        self.perturbation_codes = numpy.repeat(self.test_codes[:self.perturbations.shape[1]], self.N_attempts, axis=0)
        self.perturbation_codes = numpy.squeeze(self.perturbation_codes)
        self.accuracy = numpy.repeat(self.accuracy[:self.perturbations.shape[1]], self.N_attempts, axis=0)

        # Then, reshape the perturbations!
        self.perturbations = numpy.swapaxes(self.perturbations, 0, 1)
        self.perturbations = self.perturbations.reshape((self.perturbations.shape[0] * self.perturbations.shape[1], -1))
        log('[Testing] read %s' % self.args.perturbations_file)

        self.success = utils.read_hdf5(self.args.success_file)
        self.success = numpy.swapaxes(self.success, 0, 1)
        self.success = self.success.reshape((self.success.shape[0] * self.success.shape[1]))
        log('[Testing] read %s' % self.args.success_file)

        assert self.args.decoder_files
        decoder_files = self.args.decoder_files.split(',')
        for decoder_file in decoder_files:
            assert os.path.exists(decoder_file), 'could not find %s' % decoder_file

        log('[Testing] using %d input channels' % self.test_images.shape[3])
        decoder_units = list(map(int, self.args.decoder_units.split(',')))

        if len(decoder_files) > 1:
            log('[Testing] loading multiple decoders')
            decoders = []
            for i in range(len(decoder_files)):
                decoder = models.LearnedDecoder(self.args.latent_space_size, resolution=(self.test_images.shape[3], self.test_images.shape[1], self.test_images.shape[2]),
                                                architecture=self.args.decoder_architecture,
                                                start_channels=self.args.decoder_channels,
                                                activation=self.args.decoder_activation,
                                                batch_normalization=not self.args.decoder_no_batch_normalization,
                                                units=decoder_units)

                state = State.load(decoder_files[i])
                decoder.load_state_dict(state.model)
                if self.args.use_gpu and not cuda.is_cuda(decoder):
                    decoder = decoder.cuda()
                decoders.append(decoder)

                decoder.eval()
                log('[Testing] loaded %s' % decoder_files[i])
            self.model = models.SelectiveDecoder(decoders, resolution=(self.test_images.shape[3], self.test_images.shape[1], self.test_images.shape[2]))
        else:
            log('[Testing] loading one decoder')
            decoder = models.LearnedDecoder(self.args.latent_space_size, resolution=(self.test_images.shape[3], self.test_images.shape[1], self.test_images.shape[2]),
                                            architecture=self.args.decoder_architecture,
                                            start_channels=self.args.decoder_channels,
                                            activation=self.args.decoder_activation,
                                            batch_normalization=not self.args.decoder_no_batch_normalization,
                                            units=decoder_units)

            state = State.load(decoder_files[0])
            decoder.load_state_dict(state.model)
            if self.args.use_gpu and not cuda.is_cuda(decoder):
                decoder = decoder.cuda()
            decoder.eval()
            log('[Testing] read decoder')

            self.model = decoder
Пример #12
0
    def attack(self):
        """
        Test the model.
        """

        assert self.model is not None
        assert self.model.classifier.training is False

        concatenate_axis = -1
        if os.path.exists(self.args.perturbations_file) and os.path.exists(
                self.args.success_file):
            self.original_perturbations = utils.read_hdf5(
                self.args.perturbations_file)
            assert len(self.original_perturbations.shape) == 3
            log('[Attack] read %s' % self.args.perturbations_file)

            self.original_success = utils.read_hdf5(self.args.success_file)
            log('[Attack] read %s' % self.args.success_file)

            assert self.original_perturbations.shape[
                0] == self.original_success.shape[0]
            assert self.original_perturbations.shape[
                1] == self.original_success.shape[1]

            if self.original_perturbations.shape[
                    1] <= self.args.max_samples and self.original_perturbations.shape[
                        0] <= self.args.max_attempts:
                log('[Attack] found %d attempts, %d samples, requested no more'
                    % (self.original_perturbations.shape[0],
                       self.original_perturbations.shape[1]))
                return
            elif self.original_perturbations.shape[
                    0] == self.args.max_attempts or self.original_perturbations.shape[
                        1] == self.args.max_samples:
                if self.original_perturbations.shape[
                        0] == self.args.max_attempts:
                    self.test_images = self.test_images[
                        self.original_perturbations.shape[1]:]
                    self.test_codes = self.test_codes[
                        self.original_perturbations.shape[1]:]
                    self.args.max_samples = self.args.max_samples - self.original_perturbations.shape[
                        1]
                    concatenate_axis = 1
                    log('[Attack] found %d attempts with %d perturbations, computing %d more perturbations'
                        % (self.original_perturbations.shape[0],
                           self.original_perturbations.shape[1],
                           self.args.max_samples))
                elif self.original_perturbations.shape[
                        1] == self.args.max_samples:
                    self.args.max_attempts = self.args.max_attempts - self.original_perturbations.shape[
                        0]
                    concatenate_axis = 0
                    log('[Attack] found %d attempts with %d perturbations, computing %d more attempts'
                        % (self.original_perturbations.shape[0],
                           self.original_perturbations.shape[1],
                           self.args.max_attempts))

        self.perturbations = numpy.zeros(
            (self.args.max_attempts, self.args.max_samples, self.args.N_theta))
        self.success = numpy.ones(
            (self.args.max_attempts, self.args.max_samples), dtype=int) * -1

        if self.args.attack.find('Batch') >= 0:
            batch_size = min(self.args.batch_size, self.args.max_samples)
        else:
            batch_size = 1

        objective = self.objective_class()
        num_batches = int(math.ceil(self.args.max_samples / batch_size))

        for i in range(num_batches):
            if i * batch_size == self.args.max_samples:
                break

            i_start = i * batch_size
            i_end = min((i + 1) * batch_size, self.args.max_samples)

            batch_classes = common.torch.as_variable(
                self.test_codes[i_start:i_end], self.args.use_gpu)
            batch_theta = common.torch.as_variable(
                numpy.zeros((i_end - i_start, self.args.N_theta),
                            dtype=numpy.float32), self.args.use_gpu)
            if self.args.N_theta > 4:
                batch_theta[:, 4] = 1
            batch_images = common.torch.as_variable(
                self.test_images[i_start:i_end], self.args.use_gpu)
            batch_images = batch_images.permute(0, 3, 1, 2)

            self.model.decoder.set_image(batch_images)
            #output_images = self.model.decoder.forward(batch_theta)
            #error = torch.sum(torch.abs(output_images - batch_images))
            #error = error.item()
            #print(error)
            #from matplotlib import pyplot
            #output_images = numpy.squeeze(numpy.transpose(output_images.cpu().detach().numpy(), (0, 2, 3, 1)))
            #pyplot.imshow(output_images[0])
            #pyplot.show()

            t = 0
            while True and t < self.args.max_attempts:
                attack = self.setup_attack(batch_theta, batch_classes)
                success, perturbations, probabilities, norm, _ = attack.run(
                    objective)
                assert not numpy.any(
                    perturbations != perturbations), perturbations

                # Note that we save the perturbed image, not only the perturbation!
                perturbations = perturbations.reshape(batch_theta.size(
                ))  # hack for when only one dimensional latent space is used!
                self.perturbations[t][
                    i_start:i_end] = perturbations + batch_theta.cpu().detach(
                    ).numpy()
                self.success[t][i_start:i_end] = success
                t += 1

            log('[Attack] %d: completed' % i)

        if concatenate_axis >= 0:
            if self.perturbations.shape[0] == self.args.max_attempts:
                self.perturbations = numpy.concatenate(
                    (self.original_perturbations, self.perturbations),
                    axis=concatenate_axis)
                self.success = numpy.concatenate(
                    (self.original_success, self.success),
                    axis=concatenate_axis)
                log('[Attack] concatenated')

        utils.write_hdf5(self.args.perturbations_file, self.perturbations)
        log('[Attack] wrote %s' % self.args.perturbations_file)
        utils.write_hdf5(self.args.success_file, self.success)
        log('[Attack] wrote %s' % self.args.success_file)