def main(self):
        """
        Main method.
        """

        database = utils.read_hdf5(self.args.database_file)
        log('[Data] read %s' % self.args.database_file)

        N_font = database.shape[0]
        N_class = database.shape[1]

        assert database.shape[2] == database.shape[3]
        database = database.reshape((database.shape[0] * database.shape[1],
                                     database.shape[2], database.shape[3]))
        database = torch.from_numpy(database).float()
        if self.args.use_gpu:
            database = database.cuda()

        database = torch.autograd.Variable(database)

        codes = utils.read_hdf5(self.args.codes_file)
        codes = codes[:, 0]
        codes = common.numpy.one_hot(codes, N_font * N_class)
        log('[Data] read %s' % self.args.codes_file)

        theta = utils.read_hdf5(self.args.theta_file)
        N = theta.shape[0]
        N_theta = theta.shape[1]
        log('[Data] read %s' % self.args.theta_file)

        model = models.OneHotDecoder(database, N_theta)
        images = []

        num_batches = int(math.ceil(float(N) / self.args.batch_size))
        for b in range(num_batches):
            batch_theta = torch.from_numpy(
                theta[b * self.args.batch_size:min((b + 1) *
                                                   self.args.batch_size, N)])
            batch_codes = torch.from_numpy(
                codes[b * self.args.batch_size:min((b + 1) *
                                                   self.args.batch_size, N)])
            batch_codes, batch_theta = batch_codes.float(), batch_theta.float()

            if self.args.use_gpu:
                batch_codes, batch_theta = batch_codes.cuda(
                ), batch_theta.cuda()

            batch_codes, batch_theta = torch.autograd.Variable(
                batch_codes), torch.autograd.Variable(batch_theta)
            output = model(batch_codes, batch_theta)

            images.append(output.data.cpu().numpy().squeeze())
            if b % 1000 == 0:
                log('[Data] processed %d/%d batches' % (b + 1, num_batches))

        images = numpy.concatenate(images, axis=0)
        if len(images.shape) > 3:
            images = numpy.transpose(images, (0, 2, 3, 1))
        utils.write_hdf5(self.args.images_file, images)
        log('[Data] wrote %s' % self.args.images_file)
Exemplo n.º 2
0
    def test(self):
        """
        Test classifier to identify valid samples to attack.
        """

        num_batches = int(
            math.ceil(self.perturbations.shape[0] / self.args.batch_size))

        for b in range(num_batches):
            b_start = b * self.args.batch_size
            b_end = min((b + 1) * self.args.batch_size,
                        self.perturbations.shape[0])
            batch_fonts = self.test_fonts[b_start:b_end]
            batch_classes = self.test_classes[b_start:b_end]
            batch_code = numpy.concatenate(
                (common.numpy.one_hot(batch_fonts, self.N_font),
                 common.numpy.one_hot(batch_classes, self.N_class)),
                axis=1).astype(numpy.float32)

            batch_inputs = common.torch.as_variable(
                self.perturbations[b_start:b_end], self.args.use_gpu)
            batch_code = common.torch.as_variable(batch_code,
                                                  self.args.use_gpu)

            # This basically allows to only optimize over theta, keeping the font/class code fixed.
            self.model.set_code(batch_code)
            output_images = self.model(batch_inputs)

            output_images = numpy.squeeze(
                numpy.transpose(output_images.cpu().detach().numpy(),
                                (0, 2, 3, 1)))
            self.perturbation_images = common.numpy.concatenate(
                self.perturbation_images, output_images)

            if b % 100 == 0:
                log('[Testing] computing perturbation images %d' % b)

        utils.makedir(os.path.dirname(self.args.perturbation_images_file))
        if len(self.perturbation_images.shape) > 3:
            self.perturbation_images = self.perturbation_images.reshape(
                self.N_samples, self.N_attempts,
                self.perturbation_images.shape[1],
                self.perturbation_images.shape[2],
                self.perturbation_images.shape[3])
        else:
            self.perturbation_images = self.perturbation_images.reshape(
                self.N_samples, self.N_attempts,
                self.perturbation_images.shape[1],
                self.perturbation_images.shape[2])
        self.perturbation_images = numpy.swapaxes(self.perturbation_images, 0,
                                                  1)
        utils.write_hdf5(self.args.perturbation_images_file,
                         self.perturbation_images)
        log('[Testing] wrote %s' % self.args.perturbation_images_file)
Exemplo n.º 3
0
    def plot_manifolds(self):
        """
        Plot manifolds.
        """

        #
        # Plot all classes and adversarial examples in image space for individual classes as well as all classes.
        #

        fit = self.test_codes.shape[0]//25
        test_images = self.test_images.reshape((self.test_images.shape[0], -1))
        manifold_visualization = plot.ManifoldVisualization('tsne', pre_pca=40)
        manifold_visualization.fit(test_images[:fit])
        log('[Testing] computed t-SNE on test images')

        for n in range(self.N_class):
            labels = ['Class %d' % (nn + 1) for nn in range(self.N_class)] + ['Adversarial Examples Class %d' % (n + 1)]
            data = numpy.concatenate((
                test_images[:fit],
                self.perturbation_images[self.perturbation_codes == n]
            ))
            classes = numpy.concatenate((
                self.test_codes[:fit],
                numpy.ones((self.perturbation_images[self.perturbation_codes == n].shape[0])) * 10,
            ))
            plot_file = os.path.join(self.args.plot_directory, 'perturbations_%d' % (n + 1))
            manifold_visualization.visualize(plot_file, data, classes, labels, title='Adversarial Examples Class %d\n(The adversarial examples are projected into the embedding using learned SVRs)' % n)
            log('[Testing] wrote %s' % plot_file)

        labels = ['Class %d' % (n + 1) for n in range(self.N_class)] + ['Adversarial Examples Class %d' % (n + 1) for n in range(self.N_class)]
        data = numpy.concatenate((
            test_images[:fit],
            self.perturbation_images
        ))
        classes = numpy.concatenate((
            self.test_codes[:fit],
            self.perturbation_codes + 10,
        ))
        plot_file = os.path.join(self.args.plot_directory, 'perturbations')
        manifold_visualization.visualize(plot_file, data, classes, labels, title='Adversarial Examples\n(The adversarial examples are projected into the embedding using learned SVRs)')
        log('[Testing] wrote %s' % plot_file)
    def compute_local_pca(self):
        """
        Compute PCA.
        """

        success = numpy.logical_and(self.success >= 0, self.accuracy)
        log('[Detection] %d valid attacked samples' % numpy.sum(success))

        nearest_neighbor_images = self.nearest_neighbor_images.reshape(self.nearest_neighbor_images.shape[0], -1)
        nearest_neighbor_images = nearest_neighbor_images[:self.args.n_fit]

        perturbations = self.perturbations.reshape(self.perturbations.shape[0], -1)
        test_images = self.test_images.reshape(self.test_images.shape[0], -1)
        pure_perturbations = perturbations - test_images

        nearest_neighbors_indices = self.compute_nearest_neighbors(perturbations)

        self.distances['true'] = numpy.zeros((success.shape[0]))
        self.distances['test'] = numpy.zeros((success.shape[0]))
        self.distances['perturbation'] = numpy.zeros((success.shape[0]))

        self.angles['true'] = numpy.zeros((success.shape[0]))
        self.angles['test'] = numpy.zeros((success.shape[0]))
        self.angles['perturbation'] = numpy.zeros((success.shape[0]))

        for n in range(pure_perturbations.shape[0]):
            if success[n]:
                nearest_neighbors = nearest_neighbor_images[nearest_neighbors_indices[n, :]]
                nearest_neighbors = numpy.concatenate((nearest_neighbors, test_images[n].reshape(1, -1)), axis=0)

                pca = sklearn.decomposition.IncrementalPCA(n_components=self.args.n_pca)
                pca.fit(nearest_neighbors)

                reconstructed_test_images = pca.inverse_transform(pca.transform(test_images[n].reshape(1, -1)))
                reconstructed_perturbations = pca.inverse_transform(pca.transform(perturbations[n].reshape(1, -1)))
                reconstructed_pure_perturbations = pca.inverse_transform(pca.transform(pure_perturbations[n].reshape(1, -1)))

                self.distances['test'][n] = numpy.average(numpy.multiply(reconstructed_test_images - test_images[n], reconstructed_test_images - test_images[n]), axis=1)
                self.distances['perturbation'][n] = numpy.average(numpy.multiply(reconstructed_perturbations - perturbations[n], reconstructed_perturbations - perturbations[n]), axis=1)
                self.distances['true'][n] = numpy.average(numpy.multiply(reconstructed_pure_perturbations - pure_perturbations[n], reconstructed_pure_perturbations - pure_perturbations[n]), axis=1)

                self.angles['test'][n] = numpy.rad2deg(common.numpy.angles(reconstructed_test_images.T, test_images[n].T))
                self.angles['perturbation'][n] = numpy.rad2deg(common.numpy.angles(reconstructed_perturbations.T, perturbations[n].T))
                self.angles['true'][n] = numpy.rad2deg(common.numpy.angles(reconstructed_pure_perturbations.T, pure_perturbations[n].T))

                log('[Detection] %d: true distance=%g angle=%g' % (n, self.distances['true'][n], self.angles['true'][n]))
                log('[Detection] %d: perturbation distance=%g angle=%g' % (n, self.distances['perturbation'][n], self.angles['perturbation'][n]))
                log('[Detection] %d: test distance=%g angle=%g' % (n, self.distances['test'][n], self.angles['test'][n]))

        self.distances['test'] = self.distances['test'][success]
        self.distances['perturbation'] = self.distances['perturbation'][success]
        self.distances['true'] = self.distances['true'][success]
Exemplo n.º 5
0
    def compute_images(self):
        """
        Compute images through decoder.
        """

        assert self.model.training is False

        num_batches = int(
            math.ceil(self.perturbations.shape[0] / self.args.batch_size))
        for b in range(num_batches):
            b_start = b * self.args.batch_size
            b_end = min((b + 1) * self.args.batch_size,
                        self.perturbations.shape[0])

            batch_fonts = self.perturbation_codes[b_start:b_end, 1]
            batch_classes = self.perturbation_codes[b_start:b_end, 2]
            batch_code = numpy.concatenate(
                (common.numpy.one_hot(batch_fonts, self.N_font),
                 common.numpy.one_hot(batch_classes, self.N_class)),
                axis=1).astype(numpy.float32)

            batch_code = common.torch.as_variable(batch_code,
                                                  self.args.use_gpu)
            batch_theta = common.torch.as_variable(
                self.perturbation_theta[b_start:b_end], self.args.use_gpu)
            theta_images = self.model(batch_code, batch_theta)

            batch_perturbation = common.torch.as_variable(
                self.perturbations[b_start:b_end], self.args.use_gpu)
            perturbation_images = self.model(batch_code, batch_perturbation)

            if b % 100:
                log('[Testing] %d' % b)

            theta_images = numpy.squeeze(
                numpy.transpose(theta_images.cpu().detach().numpy(),
                                (0, 2, 3, 1)))
            self.theta_images = common.numpy.concatenate(
                self.theta_images, theta_images)

            perturbation_images = numpy.squeeze(
                numpy.transpose(perturbation_images.cpu().detach().numpy(),
                                (0, 2, 3, 1)))
            self.perturbation_images = common.numpy.concatenate(
                self.perturbation_images, perturbation_images)

        self.theta_images = self.theta_images.reshape(
            (self.theta_images.shape[0], -1))
        self.perturbation_images = self.perturbation_images.reshape(
            (self.perturbation_images.shape[0], -1))
    def compute_images(self):
        """
        Compute images.
        """

        assert self.test_codes is not None

        num_batches = int(
            math.ceil(self.perturbations.shape[0] / self.args.batch_size))
        for b in range(num_batches):
            b_start = b * self.args.batch_size
            b_end = min((b + 1) * self.args.batch_size,
                        self.perturbations.shape[0])

            batch_fonts = self.test_codes[b_start:b_end, 1]
            batch_classes = self.test_codes[b_start:b_end, 2]
            batch_code = numpy.concatenate(
                (common.numpy.one_hot(batch_fonts, self.N_font),
                 common.numpy.one_hot(batch_classes, self.N_class)),
                axis=1).astype(numpy.float32)

            batch_code = common.torch.as_variable(batch_code,
                                                  self.args.use_gpu)
            batch_perturbation = common.torch.as_variable(
                self.perturbations[b_start:b_end].astype(numpy.float32),
                self.args.use_gpu)
            perturbation_images = self.model(batch_code, batch_perturbation)

            if b % 100:
                log('[Testing] %d' % b)

            perturbation_images = numpy.squeeze(
                perturbation_images.cpu().detach().numpy())
            self.perturbation_images = common.numpy.concatenate(
                self.perturbation_images, perturbation_images)

        # Trick to perform analysis on actual images of adversarial examples.
        self.perturbations = self.perturbation_images
    def test(self):
        """
        Test classifier to identify valid samples to attack.
        """

        num_batches = int(math.ceil(self.test_theta.shape[0] / self.args.batch_size))

        for b in range(num_batches):
            b_start = b * self.args.batch_size
            b_end = min((b + 1) * self.args.batch_size, self.test_theta.shape[0])
            batch_fonts = self.test_fonts[b_start: b_end]
            batch_classes = self.test_classes[b_start: b_end]
            batch_code = numpy.concatenate((common.numpy.one_hot(batch_fonts, self.N_font), common.numpy.one_hot(batch_classes, self.N_class)), axis=1).astype(numpy.float32)

            batch_classes = common.torch.as_variable(batch_classes, self.args.use_gpu)
            batch_inputs = common.torch.as_variable(self.test_theta[b_start: b_end], self.args.use_gpu)
            batch_code = common.torch.as_variable(batch_code, self.args.use_gpu)

            # This basically allows to only optimize over theta, keeping the font/class code fixed.
            self.model.decoder.set_code(batch_code)

            output_classes = self.model(batch_inputs)
            values, indices = torch.max(torch.nn.functional.softmax(output_classes, dim=1), dim=1)
            errors = torch.abs(indices - batch_classes)

            self.accuracy = common.numpy.concatenate(self.accuracy, errors.data.cpu().numpy())

            if b % 100 == 0:
                log('[Attack] computing accuracy %d' % b)

        self.accuracy = self.accuracy == 0
        utils.write_hdf5(self.args.accuracy_file, self.accuracy)
        log('[Attack] wrote %s' % self.args.accuracy_file)

        accuracy = numpy.sum(self.accuracy) / float(self.accuracy.shape[0])
        log('[Attack] accuracy %g' % accuracy)
        accuracy = numpy.sum(self.accuracy[:self.args.max_samples]) / float(self.args.max_samples)
        log('[Attack] accuracy on %d samples %g' % (self.args.max_samples, accuracy))
    def train(self, epoch):
        """
        Train for one epoch.

        :param epoch: current epoch
        :type epoch: int
        """

        assert self.encoder is not None and self.decoder is not None
        assert self.scheduler is not None

        self.auto_encoder.train()
        log('[Training] %d set auto encoder to train' % epoch)
        self.encoder.train()
        log('[Training] %d set encoder to train' % epoch)
        self.decoder.train()
        log('[Training] %d set decoder to train' % epoch)

        num_batches = int(math.ceil(self.train_images.shape[0]/self.args.batch_size))
        assert self.encoder.training is True

        permutation = numpy.random.permutation(self.train_images.shape[0])
        permutation = numpy.concatenate((permutation, permutation[:self.args.batch_size]), axis=0)

        for b in range(num_batches):
            self.scheduler.update(epoch, float(b)/num_batches)

            perm = permutation[b * self.args.batch_size: (b + 1) * self.args.batch_size]
            batch_images = common.torch.as_variable(self.train_images[perm], self.args.use_gpu)
            batch_images = batch_images.permute(0, 3, 1, 2)

            output_images, output_mu, output_logvar = self.auto_encoder(batch_images)
            reconstruction_loss = self.reconstruction_loss(batch_images, output_images)

            self.scheduler.optimizer.zero_grad()
            latent_loss = self.latent_loss(output_mu, output_logvar)
            loss = self.args.beta*reconstruction_loss + latent_loss
            loss.backward()
            self.scheduler.optimizer.step()
            reconstruction_loss = reconstruction_loss.item()
            latent_loss = latent_loss.item()

            reconstruction_error = self.reconstruction_error(batch_images, output_images)
            reconstruction_error = reconstruction_error.item()

            iteration = epoch*num_batches + b + 1
            self.train_statistics = numpy.vstack((self.train_statistics, numpy.array([
                iteration,
                iteration * self.args.batch_size,
                min(num_batches, iteration),
                min(num_batches, iteration) * self.args.batch_size,
                reconstruction_loss,
                reconstruction_error,
                latent_loss,
                torch.mean(output_mu).item(),
                torch.var(output_mu).item(),
                torch.mean(output_logvar).item(),
            ])))

            skip = 10
            if b%skip == skip//2:
                log('[Training] %d | %d: %g (%g) %g %g %g %g' % (
                    epoch,
                    b,
                    numpy.mean(self.train_statistics[max(0, iteration-skip):iteration, 4]),
                    numpy.mean(self.train_statistics[max(0, iteration-skip):iteration, 5]),
                    numpy.mean(self.train_statistics[max(0, iteration-skip):iteration, 6]),
                    numpy.mean(self.train_statistics[max(0, iteration-skip):iteration, 7]),
                    numpy.mean(self.train_statistics[max(0, iteration-skip):iteration, 8]),
                    numpy.mean(self.train_statistics[max(0, iteration-skip):iteration, 9]),
                ))
    def compute_nn(self, inclusive=False):
        """
        Test detector.
        """

        success = numpy.logical_and(self.success >= 0, self.accuracy)
        log('[Detection] %d valid attacked samples' % numpy.sum(success))

        nearest_neighbor_images = self.nearest_neighbor_images.reshape(self.nearest_neighbor_images.shape[0], -1)
        perturbations = self.perturbations.reshape(self.perturbations.shape[0], -1)
        test_images = self.test_images.reshape(self.test_images.shape[0], -1)

        nearest_neighbors_indices = self.compute_nearest_neighbors(perturbations)
        pure_perturbations = perturbations - test_images
        log('[Detection] computed nearest neighbors for perturbations')

        self.distances['true'] = numpy.zeros((success.shape[0]))
        self.distances['test'] = numpy.zeros((success.shape[0]))
        self.distances['perturbation'] = numpy.zeros((success.shape[0]))

        self.angles['true'] = numpy.zeros((success.shape[0]))
        self.angles['test'] = numpy.zeros((success.shape[0]))
        self.angles['perturbation'] = numpy.zeros((success.shape[0]))

        for n in range(pure_perturbations.shape[0]):
            if success[n]:
                nearest_neighbors = nearest_neighbor_images[nearest_neighbors_indices[n, :]]

                if inclusive:
                    nearest_neighbors = numpy.concatenate((nearest_neighbors, test_images[n].reshape(1, -1)), axis=0)
                    nearest_neighbor_mean = test_images[n]
                else:
                    nearest_neighbor_mean = numpy.average(nearest_neighbors, axis=0)

                nearest_neighbor_basis = nearest_neighbors - nearest_neighbor_mean

                relative_perturbation = perturbations[n] - nearest_neighbor_mean
                relative_test_image = test_images[n] - nearest_neighbor_mean

                if inclusive:
                    assert numpy.allclose(relative_test_image, nearest_neighbor_basis[-1])

                nearest_neighbor_vectors = numpy.stack((
                    pure_perturbations[n],
                    relative_perturbation,
                    relative_test_image
                ), axis=1)

                nearest_neighbor_projections = common.numpy.project_orthogonal(nearest_neighbor_basis.T, nearest_neighbor_vectors)
                assert nearest_neighbor_vectors.shape[0] == nearest_neighbor_projections.shape[0]
                assert nearest_neighbor_vectors.shape[1] == nearest_neighbor_projections.shape[1]

                angles = numpy.rad2deg(common.numpy.angles(nearest_neighbor_vectors, nearest_neighbor_projections))
                distances = numpy.linalg.norm(nearest_neighbor_vectors - nearest_neighbor_projections, ord=2, axis=0)

                assert distances.shape[0] == 3
                assert angles.shape[0] == 3

                self.distances['true'][n] = distances[0]
                self.distances['perturbation'][n] = distances[1]
                self.distances['test'][n] = distances[2]

                self.angles['true'][n] = angles[0]
                self.angles['perturbation'][n] = angles[1]
                self.angles['test'][n] = angles[2]

                log('[Detection] %d: true distance=%g angle=%g' % (n, self.distances['true'][n], self.angles['true'][n]))
                log('[Detection] %d: perturbation distance=%g angle=%g' % (n, self.distances['perturbation'][n], self.angles['perturbation'][n]))
                log('[Detection] %d: test distance=%g angle=%g' % (n, self.distances['test'][n], self.angles['test'][n]))

        self.distances['true'] = self.distances['true'][success]
        self.distances['test'] = self.distances['test'][success]
        self.distances['perturbation'] = self.distances['perturbation'][success]

        self.angles['true'] = self.angles['true'][success]
        self.angles['test'] = self.angles['test'][success]
        self.angles['perturbation'] = self.angles['perturbation'][success]

        if inclusive:
            self.distances['test'][:] = 0
            self.angles['test'][:] = 0
    def attack(self):
        """
        Test the model.
        """

        assert self.model is not None
        assert self.model.classifier.training is False

        concatenate_axis = -1
        if os.path.exists(self.args.perturbations_file) and os.path.exists(self.args.success_file):
            self.original_perturbations = utils.read_hdf5(self.args.perturbations_file)
            assert len(self.original_perturbations.shape) == 3, self.original_perturbations.shape
            log('[Attack] read %s' % self.args.perturbations_file)

            self.original_success = utils.read_hdf5(self.args.success_file)
            log('[Attack] read %s' % self.args.success_file)

            assert self.original_perturbations.shape[0] == self.original_success.shape[0]
            assert self.original_perturbations.shape[1] == self.original_success.shape[1]
            assert self.original_perturbations.shape[2] == self.test_theta.shape[1]

            if self.original_perturbations.shape[1] <= self.args.max_samples and self.original_perturbations.shape[0] <= self.args.max_attempts:
                log('[Attack] found %d attempts, %d samples, requested no more' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1]))
                return
            elif self.original_perturbations.shape[0] == self.args.max_attempts or self.original_perturbations.shape[1] == self.args.max_samples:
                if self.original_perturbations.shape[0] == self.args.max_attempts:
                    self.test_theta = self.test_theta[self.original_perturbations.shape[1]:]
                    self.test_fonts = self.test_fonts[self.original_perturbations.shape[1]:]
                    self.test_classes = self.test_classes[self.original_perturbations.shape[1]:]
                    self.args.max_samples = self.args.max_samples - self.original_perturbations.shape[1]
                    concatenate_axis = 1
                    log('[Attack] found %d attempts with %d perturbations, computing %d more perturbations' % (
                    self.original_perturbations.shape[0], self.original_perturbations.shape[1], self.args.max_samples))
                elif self.original_perturbations.shape[1] == self.args.max_samples:
                    self.args.max_attempts = self.args.max_attempts - self.original_perturbations.shape[0]
                    concatenate_axis = 0
                    log('[Attack] found %d attempts with %d perturbations, computing %d more attempts' % (
                    self.original_perturbations.shape[0], self.original_perturbations.shape[1], self.args.max_attempts))

        self.perturbations = numpy.zeros((self.args.max_attempts, self.args.max_samples, self.test_theta.shape[1]))
        self.success = numpy.ones((self.args.max_attempts, self.args.max_samples), dtype=int) * -1

        if self.args.attack.find('Batch') >= 0:
            batch_size = min(self.args.batch_size, self.args.max_samples)
        else:
            batch_size = 1

        objective = self.objective_class()
        num_batches = int(math.ceil(self.args.max_samples/batch_size))

        for i in range(num_batches):
            if i*batch_size == self.args.max_samples:
                break

            i_start = i * batch_size
            i_end = min((i + 1) * batch_size, self.args.max_samples)

            batch_fonts = self.test_fonts[i_start: i_end]
            batch_classes = self.test_classes[i_start: i_end]
            batch_code = numpy.concatenate((common.numpy.one_hot(batch_fonts, self.N_font), common.numpy.one_hot(batch_classes, self.N_class)), axis=1).astype(numpy.float32)

            batch_classes = common.torch.as_variable(batch_classes, self.args.use_gpu)
            batch_inputs = common.torch.as_variable(self.test_theta[i_start: i_end], self.args.use_gpu)
            batch_code = common.torch.as_variable(batch_code, self.args.use_gpu)

            t = 0
            # This basically allows to only optimize over theta, keeping the font/class code fixed.
            self.model.decoder.set_code(batch_code)

            while True and t < self.args.max_attempts:
                attack = self.setup_attack(batch_inputs, batch_classes)
                success, perturbations, probabilities, norm, _ = attack.run(objective)
                assert not numpy.any(perturbations != perturbations), perturbations

                # Note that we save the perturbed image, not only the perturbation!
                perturbations = perturbations.reshape(batch_inputs.size())  # hack for when only one dimensional latent space is used!
                self.perturbations[t][i_start: i_end] = perturbations + batch_inputs.cpu().numpy()
                self.success[t][i_start: i_end] = success
                t += 1

            log('[Attack] %d: completed' % i)

        if concatenate_axis >= 0:
            if self.perturbations.shape[0] == self.args.max_attempts:
                self.perturbations = numpy.concatenate((self.original_perturbations, self.perturbations), axis=concatenate_axis)
                self.success = numpy.concatenate((self.original_success, self.success), axis=concatenate_axis)
                log('[Attack] concatenated')

        utils.write_hdf5(self.args.perturbations_file, self.perturbations)
        log('[Attack] wrote %s' % self.args.perturbations_file)
        utils.write_hdf5(self.args.success_file, self.success)
        log('[Attack] wrote %s' % self.args.success_file)
    def compute_true(self):
        """
        Compute true.
        """

        assert self.test_codes is not None
        num_batches = int(math.ceil(self.perturbations.shape[0] / self.args.batch_size))

        params = {
            'lr': 0.09,
            'lr_decay': 0.95,
            'lr_min': 0.0000001,
            'weight_decay': 0,
        }

        for b in range(num_batches):
            b_start = b * self.args.batch_size
            b_end = min((b + 1) * self.args.batch_size, self.perturbations.shape[0])

            batch_fonts = self.test_codes[b_start: b_end, 1]
            batch_classes = self.test_codes[b_start: b_end, 2]
            batch_code = numpy.concatenate((common.numpy.one_hot(batch_fonts, self.N_font), common.numpy.one_hot(batch_classes, self.N_class)), axis=1).astype( numpy.float32)
            batch_code = common.torch.as_variable(batch_code, self.args.use_gpu)

            batch_images = common.torch.as_variable(self.test_images[b_start: b_end], self.args.use_gpu)
            batch_images = batch_images.permute(0, 3, 1, 2)

            batch_theta = common.torch.as_variable(self.test_theta[b_start: b_end].astype(numpy.float32), self.args.use_gpu, True)
            batch_perturbation = common.torch.as_variable(self.perturbations[b_start: b_end].astype(numpy.float32), self.args.use_gpu)

            self.model.set_code(batch_code)

            #output_images = self.model.forward(batch_theta)
            #test_error = torch.mean(torch.mul(output_images - batch_images, output_images - batch_images))
            #print(test_error.item())
            #vis.mosaic('true.png', batch_images.cpu().detach().numpy()[:, 0, :, :])
            #vis.mosaic('output.png', output_images.cpu().detach().numpy()[:, 0, :, :])
            # print(batch_images.cpu().detach().numpy()[0])
            # print(output_images.cpu().detach().numpy()[0, 0])

            #_batch_images = batch_images.cpu().detach().numpy()
            #_output_images = output_images.cpu().detach().numpy()[:, 0, :, :]
            #test_error = numpy.max(numpy.abs(_batch_images.reshape(_batch_images.shape[0], -1) - _output_images.reshape(_output_images.shape[0], -1)), axis=1)
            #print(test_error)
            #test_error = numpy.mean(numpy.multiply(_batch_images - _output_images, _batch_images - _output_images), axis=1)
            #print(test_error)

            batch_theta = torch.nn.Parameter(batch_theta)
            scheduler = ADAMScheduler([batch_theta], **params)

            log('[Detection] %d: start' % b)
            for t in range(100):
                scheduler.update(t//10, float(t)/10)
                scheduler.optimizer.zero_grad()
                output_perturbation = self.model.forward(batch_theta)
                error = torch.mean(torch.mul(output_perturbation - batch_perturbation, output_perturbation - batch_perturbation))
                test_error = torch.mean(torch.mul(output_perturbation - batch_images, output_perturbation - batch_images))
                #error.backward()
                #scheduler.optimizer.step()

                log('[Detection] %d: %d = %g, %g' % (b, t, error.item(), test_error.item()))

                output_perturbation = numpy.squeeze(numpy.transpose(output_perturbation.cpu().detach().numpy(), (0, 2, 3, 1)))
            self.projected_perturbations = common.numpy.concatenate(self.projected_perturbations, output_perturbation)

        projected_perturbations = self.projected_perturbations.reshape((self.projected_perturbations.shape[0], -1))
        perturbations = self.perturbations.reshape((self.perturbations.shape[0], -1))

        success = numpy.logical_and(self.success >= 0, self.accuracy)
        log('[Detection] %d valid attacked samples' % numpy.sum(success))

        self.distances['true'] = numpy.linalg.norm(perturbations - projected_perturbations, ord=2, axis=1)
        self.angles['true'] = numpy.rad2deg(common.numpy.angles(perturbations.T, projected_perturbations.T))

        self.distances['true'] = self.distances['true'][success]
        self.angles['true'] = self.angles['true'][success]

        self.distances['test'] = numpy.zeros((numpy.sum(success)))
        self.angles['test'] = numpy.zeros((numpy.sum(success)))
    def load_data_and_model(self):
        """
        Load data and model.
        """

        database = utils.read_hdf5(self.args.database_file).astype(
            numpy.float32)
        log('[Visualization] read %s' % self.args.database_file)

        N_font = database.shape[0]
        N_class = database.shape[1]
        resolution = database.shape[2]

        database = database.reshape((database.shape[0] * database.shape[1],
                                     database.shape[2], database.shape[3]))
        database = torch.from_numpy(database)
        if self.args.use_gpu:
            database = database.cuda()
        database = torch.autograd.Variable(database, False)

        self.test_images = utils.read_hdf5(self.args.test_images_file).astype(
            numpy.float32)
        if len(self.test_images.shape) < 4:
            self.test_images = numpy.expand_dims(self.test_images, axis=3)

        self.perturbations = utils.read_hdf5(
            self.args.perturbations_file).astype(numpy.float32)
        self.perturbations = numpy.swapaxes(self.perturbations, 0, 1)
        log('[Visualization] read %s' % self.args.perturbations_file)

        self.success = utils.read_hdf5(self.args.success_file)
        self.success = numpy.swapaxes(self.success, 0, 1)
        log('[Visualization] read %s' % self.args.success_file)

        self.accuracy = utils.read_hdf5(self.args.accuracy_file)
        log('[Visualization] read %s' % self.args.success_file)

        self.test_theta = utils.read_hdf5(self.args.test_theta_file).astype(
            numpy.float32)
        self.test_theta = self.test_theta[:self.perturbations.shape[0]]
        N_theta = self.test_theta.shape[1]
        log('[Visualization] using %d N_theta' % N_theta)
        log('[Visualization] read %s' % self.args.test_theta_file)

        self.test_codes = utils.read_hdf5(self.args.test_codes_file).astype(
            numpy.int)
        self.test_codes = self.test_codes[:self.perturbations.shape[0]]
        self.test_codes = self.test_codes[:, 1:3]
        self.test_codes = numpy.concatenate(
            (common.numpy.one_hot(self.test_codes[:, 0], N_font),
             common.numpy.one_hot(self.test_codes[:, 1], N_class)),
            axis=1).astype(numpy.float32)
        log('[Attack] read %s' % self.args.test_codes_file)

        image_channels = 1 if N_theta <= 7 else 3
        network_units = list(map(int, self.args.network_units.split(',')))
        log('[Visualization] using %d input channels' % image_channels)
        self.classifier = models.Classifier(
            N_class,
            resolution=(image_channels, resolution, resolution),
            architecture=self.args.network_architecture,
            activation=self.args.network_activation,
            batch_normalization=not self.args.network_no_batch_normalization,
            start_channels=self.args.network_channels,
            dropout=self.args.network_dropout,
            units=network_units)
        self.decoder = models.AlternativeOneHotDecoder(database, N_font,
                                                       N_class, N_theta)
        self.decoder.eval()

        assert os.path.exists(
            self.args.classifier_file
        ), 'state file %s not found' % self.args.classifier_file
        state = State.load(self.args.classifier_file)
        log('[Visualization] read %s' % self.args.classifier_file)

        self.classifier.load_state_dict(state.model)
        if self.args.use_gpu and not cuda.is_cuda(self.classifier):
            log('[Visualization] classifier is not CUDA')
            self.classifier = self.classifier.cuda()
        log('[Visualization] loaded classifier')

        self.classifier.eval()
        log('[Visualization] set classifier to eval')
    def train(self):
        """
        Train with fair data augmentation.
        """

        self.model.train()
        assert self.model.training is True

        split = self.args.batch_size // 2
        num_batches = int(
            math.ceil(self.train_images.shape[0] / self.args.batch_size))
        permutation = numpy.random.permutation(self.train_images.shape[0])

        for b in range(num_batches):
            self.scheduler.update(self.epoch, float(b) / num_batches)

            perm = numpy.take(permutation,
                              range(b * self.args.batch_size,
                                    (b + 1) * self.args.batch_size),
                              mode='wrap')
            batch_images = common.torch.as_variable(self.train_images[perm],
                                                    self.args.use_gpu)
            batch_theta = common.torch.as_variable(self.train_theta[perm],
                                                   self.args.use_gpu)
            batch_images = batch_images.permute(0, 3, 1, 2)

            batch_fonts = self.train_codes[perm, 1]
            batch_classes = self.train_codes[perm, self.args.label_index]
            batch_code = numpy.concatenate(
                (common.numpy.one_hot(batch_fonts, self.N_font),
                 common.numpy.one_hot(batch_classes, self.N_class)),
                axis=1).astype(numpy.float32)

            batch_code = common.torch.as_variable(batch_code,
                                                  self.args.use_gpu)
            batch_classes = common.torch.as_variable(batch_classes,
                                                     self.args.use_gpu)

            loss = error = gradient = 0
            if self.args.full_variant:
                for t in range(self.args.max_iterations):
                    if self.args.strong_variant:
                        # Here, we want to uniformly sample all allowed transformations, so that's OK.
                        min_bound = numpy.repeat(self.min_bound.reshape(1, -1),
                                                 self.args.batch_size,
                                                 axis=0)
                        max_bound = numpy.repeat(self.max_bound.reshape(1, -1),
                                                 self.args.batch_size,
                                                 axis=0)
                        random = numpy.random.uniform(
                            min_bound, max_bound,
                            (batch_theta.size(0), batch_theta.size(1)))

                        batch_perturbed_theta = common.torch.as_variable(
                            random.astype(numpy.float32), self.args.use_gpu)

                        self.decoder.set_code(batch_code)
                        batch_perturbed_images = self.decoder(
                            batch_perturbed_theta)
                    else:
                        random = common.numpy.uniform_ball(
                            batch_theta.size(0),
                            batch_theta.size(1),
                            epsilon=self.args.epsilon,
                            ord=self.norm)
                        batch_perturbed_theta = batch_theta + common.torch.as_variable(
                            random.astype(numpy.float32), self.args.use_gpu)
                        batch_perturbed_theta = torch.min(
                            common.torch.as_variable(self.max_bound,
                                                     self.args.use_gpu),
                            batch_perturbed_theta)
                        batch_perturbed_theta = torch.max(
                            common.torch.as_variable(self.min_bound,
                                                     self.args.use_gpu),
                            batch_perturbed_theta)

                        self.decoder.set_code(batch_code)
                        batch_perturbed_images = self.decoder(
                            batch_perturbed_theta)

                    output_classes = self.model(batch_perturbed_images)

                    self.scheduler.optimizer.zero_grad()
                    l = self.loss(batch_classes, output_classes)
                    l.backward()
                    self.scheduler.optimizer.step()
                    loss += l.item()

                    g = torch.mean(
                        torch.abs(list(self.model.parameters())[0].grad))
                    gradient += g.item()

                    e = self.error(batch_classes, output_classes)
                    error += e.item()

                batch_perturbations = batch_perturbed_images - batch_images
                gradient /= self.args.max_iterations
                loss /= self.args.max_iterations
                error /= self.args.max_iterations
                perturbation_loss = loss
                perturbation_error = error
            else:
                output_classes = self.model(batch_images[:split])

                self.scheduler.optimizer.zero_grad()
                l = self.loss(batch_classes[:split], output_classes)
                l.backward()
                self.scheduler.optimizer.step()
                loss = l.item()

                gradient = torch.mean(
                    torch.abs(list(self.model.parameters())[0].grad))
                gradient = gradient.item()

                e = self.error(batch_classes[:split], output_classes)
                error = e.item()

                perturbation_loss = perturbation_error = 0
                for t in range(self.args.max_iterations):
                    if self.args.strong_variant:
                        # Again, sampling all possible transformations.
                        min_bound = numpy.repeat(self.min_bound.reshape(1, -1),
                                                 split,
                                                 axis=0)
                        max_bound = numpy.repeat(self.max_bound.reshape(1, -1),
                                                 split,
                                                 axis=0)
                        random = numpy.random.uniform(
                            min_bound, max_bound, (split, batch_theta.size(1)))

                        batch_perturbed_theta = common.torch.as_variable(
                            random.astype(numpy.float32), self.args.use_gpu)

                        self.decoder.set_code(batch_code[split:])
                        batch_perturbed_images = self.decoder(
                            batch_perturbed_theta)
                    else:
                        random = common.numpy.uniform_ball(
                            split,
                            batch_theta.size(1),
                            epsilon=self.args.epsilon,
                            ord=self.norm)
                        batch_perturbed_theta = batch_theta[
                            split:] + common.torch.as_variable(
                                random.astype(numpy.float32),
                                self.args.use_gpu)
                        batch_perturbed_theta = torch.min(
                            common.torch.as_variable(self.max_bound,
                                                     self.args.use_gpu),
                            batch_perturbed_theta)
                        batch_perturbed_theta = torch.max(
                            common.torch.as_variable(self.min_bound,
                                                     self.args.use_gpu),
                            batch_perturbed_theta)

                        self.decoder.set_code(batch_code[split:])
                        batch_perturbed_images = self.decoder(
                            batch_perturbed_theta)

                    output_classes = self.model(batch_perturbed_images)

                    self.scheduler.optimizer.zero_grad()
                    l = self.loss(batch_classes[split:], output_classes)
                    l.backward()
                    self.scheduler.optimizer.step()
                    perturbation_loss += l.item()

                    g = torch.mean(
                        torch.abs(list(self.model.parameters())[0].grad))
                    gradient += g.item()

                    e = self.error(batch_classes[split:], output_classes)
                    perturbation_error += e.item()

                batch_perturbations = batch_perturbed_images - batch_images[
                    split:]
                gradient /= self.args.max_iterations + 1
                perturbation_loss /= self.args.max_iterations
                perturbation_error /= self.args.max_iterations

            iteration = self.epoch * num_batches + b + 1
            self.train_statistics = numpy.vstack((
                self.train_statistics,
                numpy.array([[
                    iteration,  # iterations
                    iteration * (1 + self.args.max_iterations) *
                    self.args.batch_size,  # samples seen
                    min(num_batches, iteration) * self.args.batch_size +
                    iteration * self.args.max_iterations *
                    self.args.batch_size,  # unique samples seen
                    loss,
                    error,
                    perturbation_loss,
                    perturbation_error,
                    gradient
                ]])))

            if b % self.args.skip == self.args.skip // 2:
                log('[Training] %d | %d: %g (%g) %g (%g) [%g]' % (
                    self.epoch,
                    b,
                    numpy.mean(self.train_statistics[
                        max(0, iteration - self.args.skip):iteration, 3]),
                    numpy.mean(self.train_statistics[
                        max(0, iteration - self.args.skip):iteration, 4]),
                    numpy.mean(self.train_statistics[
                        max(0, iteration - self.args.skip):iteration, 5]),
                    numpy.mean(self.train_statistics[
                        max(0, iteration - self.args.skip):iteration, 6]),
                    numpy.mean(self.train_statistics[
                        max(0, iteration - self.args.skip):iteration, -1]),
                ))

        self.debug('clean.%d.png' % self.epoch,
                   batch_images.permute(0, 2, 3, 1))
        self.debug('perturbed.%d.png' % self.epoch,
                   batch_perturbed_images.permute(0, 2, 3, 1))
        self.debug('perturbation.%d.png' % self.epoch,
                   batch_perturbations.permute(0, 2, 3, 1),
                   cmap='seismic')
Exemplo n.º 14
0
    def train(self):
        """
        Train adversarially.
        """

        num_batches = int(
            math.ceil(self.train_images.shape[0] / self.args.batch_size))
        permutation = numpy.random.permutation(self.train_images.shape[0])
        perturbation_permutation = numpy.random.permutation(
            self.train_images.shape[0])
        if self.args.safe:
            perturbation_permutation = perturbation_permutation[
                self.train_valid == 1]
        else:
            perturbation_permuation = permutation

        for b in range(num_batches):
            self.scheduler.update(self.epoch, float(b) / num_batches)

            self.model.eval()
            assert self.model.training is False
            objective = self.objective_class()
            split = self.args.batch_size // 2

            if self.args.full_variant:
                perm = numpy.concatenate(
                    (numpy.take(permutation,
                                range(b * self.args.batch_size,
                                      b * self.args.batch_size + split),
                                mode='wrap'),
                     numpy.take(perturbation_permutation,
                                range(b * self.args.batch_size + split,
                                      (b + 1) * self.args.batch_size),
                                mode='wrap')),
                    axis=0)
                batch_images = common.torch.as_variable(
                    self.train_images[perm], self.args.use_gpu)
                batch_classes = common.torch.as_variable(
                    self.train_codes[perm], self.args.use_gpu)
                batch_theta = common.torch.as_variable(self.train_theta[perm],
                                                       self.args.use_gpu)
                batch_images = batch_images.permute(0, 3, 1, 2)

                attack = self.setup_attack(self.model, batch_images[:split],
                                           batch_classes[:split])
                success, perturbations, _, _, _ = attack.run(
                    objective, self.args.verbose)
                batch_perturbations1 = common.torch.as_variable(
                    perturbations.astype(numpy.float32), self.args.use_gpu)
                batch_perturbed_images1 = batch_images[:split] + batch_perturbations1

                if isinstance(self.decoder, models.SelectiveDecoder):
                    self.decoder.set_code(batch_classes[split:])
                attack = self.setup_decoder_attack(self.decoder_classifier,
                                                   batch_theta[split:],
                                                   batch_classes[split:])
                attack.set_bound(torch.from_numpy(self.min_bound),
                                 torch.from_numpy(self.max_bound))
                decoder_success, decoder_perturbations, probabilities, norm, _ = attack.run(
                    objective, self.args.verbose)

                batch_perturbed_theta = batch_theta[
                    split:] + common.torch.as_variable(decoder_perturbations,
                                                       self.args.use_gpu)
                batch_perturbed_images2 = self.decoder(batch_perturbed_theta)
                batch_perturbations2 = batch_perturbed_images2 - batch_images[
                    split:]

                batch_input_images = torch.cat(
                    (batch_perturbed_images1, batch_perturbed_images2), dim=0)

                self.model.train()
                assert self.model.training is True

                output_classes = self.model(batch_input_images)

                self.scheduler.optimizer.zero_grad()
                perturbation_loss = self.loss(batch_classes[:split],
                                              output_classes[:split])
                decoder_perturbation_loss = self.loss(batch_classes[split:],
                                                      output_classes[split:])
                loss = (perturbation_loss + decoder_perturbation_loss) / 2
                loss.backward()
                self.scheduler.optimizer.step()
                loss = loss.item()
                perturbation_loss = perturbation_loss.item()
                decoder_perturbation_loss = decoder_perturbation_loss.item()

                gradient = torch.mean(
                    torch.abs(list(self.model.parameters())[0].grad))
                gradient = gradient.item()

                perturbation_error = self.error(batch_classes[:split],
                                                output_classes[:split])
                perturbation_error = perturbation_error.item()

                decoder_perturbation_error = self.error(
                    batch_classes[split:], output_classes[split:])
                decoder_perturbation_error = decoder_perturbation_error.item()

                error = (perturbation_error + decoder_perturbation_error) / 2
            else:
                perm = numpy.concatenate((
                    numpy.take(
                        perturbation_permutation,
                        range(b * self.args.batch_size + split + split // 2,
                              (b + 1) * self.args.batch_size),
                        mode='wrap'),
                    numpy.take(
                        permutation,
                        range(b * self.args.batch_size,
                              b * self.args.batch_size + split + split // 2),
                        mode='wrap'),
                ),
                                         axis=0)
                batch_images = common.torch.as_variable(
                    self.train_images[perm], self.args.use_gpu)
                batch_classes = common.torch.as_variable(
                    self.train_codes[perm], self.args.use_gpu)
                batch_theta = common.torch.as_variable(self.train_theta[perm],
                                                       self.args.use_gpu)
                batch_images = batch_images.permute(0, 3, 1, 2)

                attack = self.setup_attack(self.model,
                                           batch_images[split // 2:split],
                                           batch_classes[split // 2:split])
                success, perturbations, _, _, _ = attack.run(
                    objective, self.args.verbose)
                batch_perturbations1 = common.torch.as_variable(
                    perturbations.astype(numpy.float32), self.args.use_gpu)
                batch_perturbed_images1 = batch_images[
                    split // 2:split] + batch_perturbations1

                if isinstance(self.decoder, models.SelectiveDecoder):
                    self.decoder.set_code(batch_classes[:split // 2])
                attack = self.setup_decoder_attack(self.decoder_classifier,
                                                   batch_theta[:split // 2],
                                                   batch_classes[:split // 2])
                attack.set_bound(torch.from_numpy(self.min_bound),
                                 torch.from_numpy(self.max_bound))
                decoder_success, decoder_perturbations, probabilities, norm, _ = attack.run(
                    objective, self.args.verbose)

                batch_perturbed_theta = batch_theta[:split //
                                                    2] + common.torch.as_variable(
                                                        decoder_perturbations,
                                                        self.args.use_gpu)
                batch_perturbed_images2 = self.decoder(batch_perturbed_theta)
                batch_perturbations2 = batch_perturbed_images2 - batch_images[:split
                                                                              //
                                                                              2]

                batch_input_images = torch.cat(
                    (batch_perturbed_images2, batch_perturbed_images1,
                     batch_images[split:]),
                    dim=0)

                self.model.train()
                assert self.model.training is True

                output_classes = self.model(batch_input_images)

                self.scheduler.optimizer.zero_grad()
                loss = self.loss(batch_classes[split:], output_classes[split:])
                perturbation_loss = self.loss(batch_classes[split // 2:split],
                                              output_classes[split // 2:split])
                decoder_perturbation_loss = self.loss(
                    batch_classes[:split // 2], output_classes[:split // 2])
                l = (loss + perturbation_loss + decoder_perturbation_loss) / 3
                l.backward()
                self.scheduler.optimizer.step()
                loss = loss.item()
                perturbation_loss = perturbation_loss.item()
                decoder_perturbation_loss = decoder_perturbation_loss.item()

                gradient = torch.mean(
                    torch.abs(list(self.model.parameters())[0].grad))
                gradient = gradient.item()

                error = self.error(batch_classes[split:],
                                   output_classes[split:])
                error = error.item()

                perturbation_error = self.error(
                    batch_classes[split // 2:split],
                    output_classes[split // 2:split])
                perturbation_error = perturbation_error.item()

                decoder_perturbation_error = self.error(
                    batch_classes[:split // 2], output_classes[:split // 2])
                decoder_perturbation_error = decoder_perturbation_error.item()

            iterations = numpy.mean(
                success[success >= 0]) if numpy.sum(success >= 0) > 0 else -1
            norm = numpy.mean(
                numpy.linalg.norm(perturbations.reshape(
                    perturbations.shape[0], -1),
                                  axis=1,
                                  ord=self.norm))
            success = numpy.sum(success >= 0) / self.args.batch_size

            decoder_iterations = numpy.mean(
                decoder_success[decoder_success >= 0]) if numpy.sum(
                    decoder_success >= 0) > 0 else -1
            decoder_norm = numpy.mean(
                numpy.linalg.norm(decoder_perturbations, axis=1,
                                  ord=self.norm))
            decoder_success = numpy.sum(
                decoder_success >= 0) / self.args.batch_size

            iteration = self.epoch * num_batches + b + 1
            self.train_statistics = numpy.vstack((
                self.train_statistics,
                numpy.array([[
                    iteration,  # iterations
                    iteration * (1 + self.args.max_iterations) *
                    self.args.batch_size,  # samples seen
                    min(num_batches, iteration) * self.args.batch_size +
                    iteration * self.args.max_iterations *
                    self.args.batch_size,  # unique samples seen
                    loss,
                    error,
                    perturbation_loss,
                    perturbation_error,
                    decoder_perturbation_loss,
                    decoder_perturbation_error,
                    success,
                    iterations,
                    norm,
                    decoder_success,
                    decoder_iterations,
                    decoder_norm,
                    gradient
                ]])))

            if b % self.args.skip == self.args.skip // 2:
                log('[Training] %d | %d: %g (%g) %g (%g) %g (%g) [%g]' % (
                    self.epoch,
                    b,
                    numpy.mean(self.train_statistics[
                        max(0, iteration - self.args.skip):iteration, 3]),
                    numpy.mean(self.train_statistics[
                        max(0, iteration - self.args.skip):iteration, 4]),
                    numpy.mean(self.train_statistics[
                        max(0, iteration - self.args.skip):iteration, 5]),
                    numpy.mean(self.train_statistics[
                        max(0, iteration - self.args.skip):iteration, 6]),
                    numpy.mean(self.train_statistics[
                        max(0, iteration - self.args.skip):iteration, 7]),
                    numpy.mean(self.train_statistics[
                        max(0, iteration - self.args.skip):iteration, 8]),
                    numpy.mean(self.train_statistics[
                        max(0, iteration - self.args.skip):iteration, -1]),
                ))
                log('[Training] %d | %d: %g (%g, %g) %g (%g, %g)' % (
                    self.epoch,
                    b,
                    numpy.mean(self.train_statistics[
                        max(0, iteration - self.args.skip):iteration, 9]),
                    numpy.mean(self.train_statistics[
                        max(0, iteration - self.args.skip):iteration, 10]),
                    numpy.mean(self.train_statistics[
                        max(0, iteration - self.args.skip):iteration, 11]),
                    numpy.mean(self.train_statistics[
                        max(0, iteration - self.args.skip):iteration, 12]),
                    numpy.mean(self.train_statistics[
                        max(0, iteration - self.args.skip):iteration, 13]),
                    numpy.mean(self.train_statistics[
                        max(0, iteration - self.args.skip):iteration, 14]),
                ))

        self.debug('clean.%d.png' % self.epoch,
                   batch_images.permute(0, 2, 3, 1))
        self.debug('perturbed.%d.png' % self.epoch,
                   batch_perturbed_images1.permute(0, 2, 3, 1))
        self.debug('perturbed2.%d.png' % self.epoch,
                   batch_perturbed_images2.permute(0, 2, 3, 1))
        self.debug('perturbation.%d.png' % self.epoch,
                   batch_perturbations1.permute(0, 2, 3, 1),
                   cmap='seismic')
        self.debug('perturbation2.%d.png' % self.epoch,
                   batch_perturbations2.permute(0, 2, 3, 1),
                   cmap='seismic')
    def test(self):
        """
        Test the model.
        """

        self.model.eval()
        log('[Training] %d set classifier to eval' % self.epoch)
        assert self.model.training is False

        loss = error = perturbation_loss = perturbation_error = success = iterations = norm = 0
        num_batches = int(
            math.ceil(self.args.test_samples / self.args.batch_size))

        for b in range(num_batches):
            perm = numpy.take(range(self.args.test_samples),
                              range(b * self.args.batch_size,
                                    (b + 1) * self.args.batch_size),
                              mode='clip')
            batch_images = common.torch.as_variable(self.test_images[perm],
                                                    self.args.use_gpu)
            batch_classes = common.torch.as_variable(
                self.test_codes[perm, self.args.label_index],
                self.args.use_gpu)
            batch_images = batch_images.permute(0, 3, 1, 2)

            output_classes = self.model(batch_images)
            e = self.loss(batch_classes, output_classes)
            loss += e.item()
            a = self.error(batch_classes, output_classes)
            error = a.item()

        loss /= num_batches
        error /= num_batches

        num_batches = int(
            math.ceil(self.args.attack_samples / self.args.batch_size))
        assert self.args.attack_samples > 0 and self.args.attack_samples <= self.test_images.shape[
            0]

        for b in range(num_batches):
            perm = numpy.take(range(self.args.attack_samples),
                              range(b * self.args.batch_size,
                                    (b + 1) * self.args.batch_size),
                              mode='clip')
            batch_theta = common.torch.as_variable(self.test_theta[perm],
                                                   self.args.use_gpu)

            batch_fonts = self.test_codes[perm, 1]
            batch_classes = self.test_codes[perm, self.args.label_index]
            batch_code = numpy.concatenate(
                (common.numpy.one_hot(batch_fonts, self.N_font),
                 common.numpy.one_hot(batch_classes, self.N_class)),
                axis=1).astype(numpy.float32)

            batch_code = common.torch.as_variable(batch_code,
                                                  self.args.use_gpu)
            batch_classes = common.torch.as_variable(batch_classes,
                                                     self.args.use_gpu)

            objective = self.objective_class()
            self.decoder.set_code(batch_code)
            attack = self.setup_attack(self.decoder_classifier, batch_theta,
                                       batch_classes)
            attack.set_bound(torch.from_numpy(self.min_bound),
                             torch.from_numpy(self.max_bound))
            s, p, _, _, _ = attack.run(objective, False)

            batch_perturbed_theta = batch_theta + common.torch.as_variable(
                p, self.args.use_gpu)
            batch_perturbed_images = self.decoder(batch_perturbed_theta)
            output_classes = self.model(batch_perturbed_images)

            e = self.loss(batch_classes, output_classes)
            perturbation_loss += e.item()

            e = self.error(batch_classes, output_classes)
            perturbation_error += e.item()

            iterations += numpy.mean(
                s[s >= 0]) if numpy.sum(s >= 0) > 0 else -1
            norm += numpy.mean(
                numpy.linalg.norm(p.reshape(p.shape[0], -1),
                                  axis=1,
                                  ord=self.norm))
            success += numpy.sum(s >= 0) / self.args.batch_size

        perturbation_loss /= num_batches
        perturbation_error /= num_batches
        success /= num_batches
        iterations /= num_batches
        success /= num_batches
        log('[Training] %d: test %g (%g) %g (%g)' %
            (self.epoch, loss, error, perturbation_loss, perturbation_error))
        log('[Training] %d: test %g (%g, %g)' %
            (self.epoch, success, iterations, norm))

        num_batches = int(
            math.ceil(self.train_images.shape[0] / self.args.batch_size))
        iteration = self.epoch * num_batches
        self.test_statistics = numpy.vstack((
            self.test_statistics,
            numpy.array([[
                iteration,  # iterations
                iteration * (1 + self.args.max_iterations) *
                self.args.batch_size,  # samples seen
                min(num_batches, iteration) * self.args.batch_size +
                iteration * self.args.max_iterations *
                self.args.batch_size,  # unique samples seen
                loss,
                error,
                perturbation_loss,
                perturbation_error,
                success,
                iterations,
                norm
            ]])))
    def train(self):
        """
        Train adversarially.
        """

        split = self.args.batch_size // 2
        num_batches = int(
            math.ceil(self.train_images.shape[0] / self.args.batch_size))
        permutation = numpy.random.permutation(self.train_images.shape[0])

        for b in range(num_batches):
            self.scheduler.update(self.epoch, float(b) / num_batches)

            perm = numpy.take(permutation,
                              range(b * self.args.batch_size,
                                    (b + 1) * self.args.batch_size),
                              mode='wrap')
            batch_images = common.torch.as_variable(self.train_images[perm],
                                                    self.args.use_gpu)
            batch_theta = common.torch.as_variable(self.train_theta[perm],
                                                   self.args.use_gpu)
            batch_images = batch_images.permute(0, 3, 1, 2)

            batch_fonts = self.train_codes[perm, 1]
            batch_classes = self.train_codes[perm, self.args.label_index]
            batch_code = numpy.concatenate(
                (common.numpy.one_hot(batch_fonts, self.N_font),
                 common.numpy.one_hot(batch_classes, self.N_class)),
                axis=1).astype(numpy.float32)
            batch_code = common.torch.as_variable(batch_code,
                                                  self.args.use_gpu)
            batch_classes = common.torch.as_variable(batch_classes,
                                                     self.args.use_gpu)

            self.model.eval()
            assert self.model.training is False

            if self.args.full_variant:
                objective = self.objective_class()
                self.decoder.set_code(batch_code)
                attack = self.setup_attack(self.decoder_classifier,
                                           batch_theta, batch_classes)
                attack.set_bound(torch.from_numpy(self.min_bound),
                                 torch.from_numpy(self.max_bound))
                success, perturbations, probabilities, norm, _ = attack.run(
                    objective, self.args.verbose)

                batch_perturbed_theta = batch_theta + common.torch.as_variable(
                    perturbations, self.args.use_gpu)
                batch_perturbed_images = self.decoder(batch_perturbed_theta)
                batch_perturbations = batch_perturbed_images - batch_images

                self.model.train()
                assert self.model.training is True

                output_classes = self.model(batch_perturbed_images)

                self.scheduler.optimizer.zero_grad()
                loss = self.loss(batch_classes, output_classes)
                loss.backward()
                self.scheduler.optimizer.step()
                loss = perturbation_loss = loss.item()

                gradient = torch.mean(
                    torch.abs(list(self.model.parameters())[0].grad))
                gradient = gradient.item()

                error = self.error(batch_classes, output_classes)
                error = perturbation_error = error.item()
            else:
                objective = self.objective_class()
                self.decoder.set_code(batch_code[split:])
                attack = self.setup_attack(self.decoder_classifier,
                                           batch_theta[split:],
                                           batch_classes[split:])
                attack.set_bound(torch.from_numpy(self.min_bound),
                                 torch.from_numpy(self.max_bound))
                success, perturbations, probabilities, norm, _ = attack.run(
                    objective, self.args.verbose)

                batch_perturbed_theta = batch_theta[
                    split:] + common.torch.as_variable(perturbations,
                                                       self.args.use_gpu)
                batch_perturbed_images = self.decoder(batch_perturbed_theta)
                batch_perturbations = batch_perturbed_images - batch_images[
                    split:]

                self.model.train()
                assert self.model.training is True

                batch_input_images = torch.cat(
                    (batch_images[:split], batch_perturbed_images), dim=0)
                output_classes = self.model(batch_input_images)

                self.scheduler.optimizer.zero_grad()
                loss = self.loss(batch_classes[:split], output_classes[:split])
                perturbation_loss = self.loss(batch_classes[split:],
                                              output_classes[split:])
                l = (loss + perturbation_loss) / 2
                l.backward()
                self.scheduler.optimizer.step()
                loss = loss.item()
                perturbation_loss = perturbation_loss.item()

                gradient = torch.mean(
                    torch.abs(list(self.model.parameters())[0].grad))
                gradient = gradient.item()

                error = self.error(batch_classes[:split],
                                   output_classes[:split])
                error = error.item()

                perturbation_error = self.error(batch_classes[split:],
                                                output_classes[split:])
                perturbation_error = perturbation_error.item()

            iterations = numpy.mean(
                success[success >= 0]) if numpy.sum(success >= 0) > 0 else -1
            norm = numpy.mean(
                numpy.linalg.norm(perturbations.reshape(
                    perturbations.shape[0], -1),
                                  axis=1,
                                  ord=self.norm))
            success = numpy.sum(success >= 0) / (self.args.batch_size // 2)

            iteration = self.epoch * num_batches + b + 1
            self.train_statistics = numpy.vstack((
                self.train_statistics,
                numpy.array([[
                    iteration,  # iterations
                    iteration * (1 + self.args.max_iterations) *
                    self.args.batch_size,  # samples seen
                    min(num_batches, iteration) * self.args.batch_size +
                    iteration * self.args.max_iterations *
                    self.args.batch_size,  # unique samples seen
                    loss,
                    error,
                    perturbation_loss,
                    perturbation_error,
                    success,
                    iterations,
                    norm,
                    gradient
                ]])))

            if b % self.args.skip == self.args.skip // 2:
                log('[Training] %d | %d: %g (%g) %g (%g) [%g]' % (
                    self.epoch,
                    b,
                    numpy.mean(self.train_statistics[
                        max(0, iteration - self.args.skip):iteration, 3]),
                    numpy.mean(self.train_statistics[
                        max(0, iteration - self.args.skip):iteration, 4]),
                    numpy.mean(self.train_statistics[
                        max(0, iteration - self.args.skip):iteration, 5]),
                    numpy.mean(self.train_statistics[
                        max(0, iteration - self.args.skip):iteration, 6]),
                    numpy.mean(self.train_statistics[
                        max(0, iteration - self.args.skip):iteration, -1]),
                ))
                log('[Training] %d | %d: %g (%g, %g)' % (
                    self.epoch,
                    b,
                    numpy.mean(self.train_statistics[
                        max(0, iteration - self.args.skip):iteration, 7]),
                    numpy.mean(self.train_statistics[
                        max(0, iteration - self.args.skip):iteration, 8]),
                    numpy.mean(self.train_statistics[
                        max(0, iteration - self.args.skip):iteration, 9]),
                ))

        self.debug('clean.%d.png' % self.epoch,
                   batch_images.permute(0, 2, 3, 1))
        self.debug('perturbed.%d.png' % self.epoch,
                   batch_perturbed_images.permute(0, 2, 3, 1))
        self.debug('perturbation.%d.png' % self.epoch,
                   batch_perturbations.permute(0, 2, 3, 1),
                   cmap='seismic')
    def test(self):
        """
        Test the model.
        """

        self.model.eval()
        log('[Training] %d set classifier to eval' % self.epoch)

        loss = error = perturbation_loss = perturbation_error = 0
        num_batches = int(
            math.ceil(self.args.test_samples / self.args.batch_size))
        assert self.model.training is False

        for b in range(num_batches):
            perm = numpy.take(range(self.args.test_samples),
                              range(b * self.args.batch_size,
                                    (b + 1) * self.args.batch_size),
                              mode='clip')
            batch_images = common.torch.as_variable(self.test_images[perm],
                                                    self.args.use_gpu)
            batch_theta = common.torch.as_variable(self.test_theta[perm],
                                                   self.args.use_gpu)
            batch_images = batch_images.permute(0, 3, 1, 2)

            batch_fonts = self.test_codes[perm, 1]
            batch_classes = self.test_codes[perm, self.args.label_index]
            batch_code = numpy.concatenate(
                (common.numpy.one_hot(batch_fonts, self.N_font),
                 common.numpy.one_hot(batch_classes, self.N_class)),
                axis=1).astype(numpy.float32)

            batch_code = common.torch.as_variable(batch_code,
                                                  self.args.use_gpu)
            batch_classes = common.torch.as_variable(batch_classes,
                                                     self.args.use_gpu)

            output_classes = self.model(batch_images)
            e = self.loss(batch_classes, output_classes)
            loss += e.item()
            a = self.error(batch_classes, output_classes)
            error += a.item()

            if self.args.strong_variant:
                min_bound = numpy.repeat(self.min_bound.reshape(1, -1),
                                         batch_theta.size(0),
                                         axis=0)
                max_bound = numpy.repeat(self.max_bound.reshape(1, -1),
                                         batch_theta.size(0),
                                         axis=0)
                random = numpy.random.uniform(
                    min_bound, max_bound,
                    (batch_theta.size(0), batch_theta.size(1)))

                batch_perturbed_theta = common.torch.as_variable(
                    random.astype(numpy.float32), self.args.use_gpu)

                self.decoder.set_code(batch_code)
                batch_perturbed_images = self.decoder(batch_perturbed_theta)
            else:
                random = common.numpy.uniform_ball(batch_theta.size(0),
                                                   batch_theta.size(1),
                                                   epsilon=self.args.epsilon,
                                                   ord=self.norm)
                batch_perturbed_theta = batch_theta + common.torch.as_variable(
                    random.astype(numpy.float32), self.args.use_gpu)
                batch_perturbed_theta = torch.min(
                    common.torch.as_variable(self.max_bound,
                                             self.args.use_gpu),
                    batch_perturbed_theta)
                batch_perturbed_theta = torch.max(
                    common.torch.as_variable(self.min_bound,
                                             self.args.use_gpu),
                    batch_perturbed_theta)

                self.decoder.set_code(batch_code)
                batch_perturbed_images = self.decoder(batch_perturbed_theta)

            output_classes = self.model(batch_perturbed_images)

            l = self.loss(batch_classes, output_classes)
            perturbation_loss += l.item()

            e = self.error(batch_classes, output_classes)
            perturbation_error += e.item()

        loss /= num_batches
        error /= num_batches
        perturbation_loss /= num_batches
        perturbation_error /= num_batches
        log('[Training] %d: test %g (%g) %g (%g)' %
            (self.epoch, loss, error, perturbation_loss, perturbation_error))

        num_batches = int(
            math.ceil(self.train_images.shape[0] / self.args.batch_size))
        iteration = self.epoch * num_batches
        self.test_statistics = numpy.vstack((
            self.test_statistics,
            numpy.array([[
                iteration,  # iterations
                iteration * (1 + self.args.max_iterations) *
                self.args.batch_size,  # samples seen
                min(num_batches, iteration) * self.args.batch_size +
                iteration * self.args.max_iterations *
                self.args.batch_size,  # unique samples seen
                loss,
                error,
                perturbation_loss,
                perturbation_error
            ]])))
    def train(self, epoch):
        """
        Train for one epoch.

        :param epoch: current epoch
        :type epoch: int
        """

        self.encoder.train()
        log('[Training] %d set encoder to train' % epoch)
        self.decoder.train()
        log('[Training] %d set decoder to train' % epoch)
        self.classifier.train()
        log('[Training] %d set classifier to train' % epoch)

        num_batches = int(
            math.ceil(self.train_images.shape[0] / self.args.batch_size))
        assert self.encoder.training is True

        permutation = numpy.random.permutation(self.train_images.shape[0])
        permutation = numpy.concatenate(
            (permutation, permutation[:self.args.batch_size]), axis=0)

        for b in range(num_batches):
            self.encoder_scheduler.update(epoch, float(b) / num_batches)
            self.decoder_scheduler.update(epoch, float(b) / num_batches)
            self.classifier_scheduler.update(epoch, float(b) / num_batches)

            perm = permutation[b * self.args.batch_size:(b + 1) *
                               self.args.batch_size]
            batch_images = common.torch.as_variable(self.train_images[perm],
                                                    self.args.use_gpu, True)
            batch_images = batch_images.permute(0, 3, 1, 2)

            output_mu, output_logvar = self.encoder(batch_images)
            output_codes = self.reparameterize(output_mu, output_logvar)
            output_images = self.decoder(output_codes)

            output_real_classes = self.classifier(batch_images)
            output_reconstructed_classes = self.classifier(output_images)

            latent_loss = self.latent_loss(output_mu, output_logvar)
            reconstruction_loss = self.reconstruction_loss(
                batch_images, output_images)
            decoder_loss = self.decoder_loss(output_reconstructed_classes)
            discriminator_loss = self.discriminator_loss(
                output_real_classes, output_reconstructed_classes)

            self.encoder_scheduler.optimizer.zero_grad()
            loss = latent_loss + self.args.beta * reconstruction_loss + self.args.gamma * decoder_loss + self.args.eta * torch.sum(
                torch.abs(output_logvar))
            loss.backward(retain_graph=True)
            self.encoder_scheduler.optimizer.step()

            self.decoder_scheduler.optimizer.zero_grad()
            loss = self.args.beta * reconstruction_loss + self.args.gamma * decoder_loss
            loss.backward(retain_graph=True)
            self.decoder_scheduler.optimizer.step()

            self.classifier_scheduler.optimizer.zero_grad()
            loss = self.args.gamma * discriminator_loss
            loss.backward()
            self.classifier_scheduler.optimizer.step()

            reconstruction_error = self.reconstruction_error(
                batch_images, output_images)
            iteration = epoch * num_batches + b + 1
            self.train_statistics = numpy.vstack(
                (self.train_statistics,
                 numpy.array([
                     iteration, iteration * self.args.batch_size,
                     min(num_batches, iteration),
                     min(num_batches, iteration) * self.args.batch_size,
                     reconstruction_loss.data, reconstruction_error.data,
                     latent_loss.data,
                     torch.mean(output_mu).item(),
                     torch.var(output_mu).item(),
                     torch.mean(output_logvar).item(),
                     decoder_loss.item(),
                     discriminator_loss.item(),
                     torch.mean(
                         torch.abs(list(
                             self.encoder.parameters())[0].grad)).item(),
                     torch.mean(
                         torch.abs(list(
                             self.decoder.parameters())[0].grad)).item(),
                     torch.mean(
                         torch.abs(list(
                             self.classifier.parameters())[0].grad)).item()
                 ])))

            skip = 10
            if b % skip == skip // 2:
                log('[Training] %d | %d: %g (%g) %g (%g, %g, %g)' % (
                    epoch,
                    b,
                    numpy.mean(self.train_statistics[max(0, iteration -
                                                         skip):iteration, 4]),
                    numpy.mean(self.train_statistics[max(0, iteration -
                                                         skip):iteration, 5]),
                    numpy.mean(self.train_statistics[max(0, iteration -
                                                         skip):iteration, 6]),
                    numpy.mean(self.train_statistics[max(0, iteration -
                                                         skip):iteration, 7]),
                    numpy.mean(self.train_statistics[max(0, iteration -
                                                         skip):iteration, 8]),
                    numpy.mean(self.train_statistics[max(0, iteration -
                                                         skip):iteration, 9]),
                ))
                log('[Training] %d | %d: %g %g (%g, %g, %g)' % (
                    epoch,
                    b,
                    numpy.mean(self.train_statistics[max(0, iteration -
                                                         skip):iteration, 10]),
                    numpy.mean(self.train_statistics[max(0, iteration -
                                                         skip):iteration, 11]),
                    numpy.mean(self.train_statistics[max(0, iteration -
                                                         skip):iteration, 12]),
                    numpy.mean(self.train_statistics[max(0, iteration -
                                                         skip):iteration, 13]),
                    numpy.mean(self.train_statistics[max(0, iteration -
                                                         skip):iteration, 14]),
                ))
Exemplo n.º 19
0
    def attack(self):
        """
        Test the model.
        """

        assert self.model is not None
        assert self.model.training is False
        assert self.test_images.shape[0] == self.test_codes.shape[0], 'number of samples has to match'

        concatenate_axis = -1
        if os.path.exists(self.args.perturbations_file) and os.path.exists(self.args.success_file):
            self.original_perturbations = utils.read_hdf5(self.args.perturbations_file)
            if self.test_images.shape[3] > 1:
                assert len(self.original_perturbations.shape) == 5
            else:
                assert len(self.original_perturbations.shape) == 4
            log('[Attack] read %s' % self.args.perturbations_file)

            self.original_success = utils.read_hdf5(self.args.success_file)
            log('[Attack] read %s' % self.args.success_file)

            assert self.original_perturbations.shape[0] == self.original_success.shape[0]
            assert self.original_perturbations.shape[1] == self.original_success.shape[1]
            assert self.original_perturbations.shape[2] == self.test_images.shape[1]
            assert self.original_perturbations.shape[3] == self.test_images.shape[2]#

            if self.original_perturbations.shape[1] >= self.args.max_samples and self.original_perturbations.shape[0] >= self.args.max_attempts:
                log('[Attack] found %d attempts, %d samples, requested no more' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1]))
                return
            elif self.original_perturbations.shape[0] == self.args.max_attempts or self.original_perturbations.shape[1] == self.args.max_samples:
                if self.original_perturbations.shape[0] == self.args.max_attempts:
                    self.test_images = self.test_images[self.original_perturbations.shape[1]:]
                    self.test_codes = self.test_codes[self.original_perturbations.shape[1]:]
                    self.args.max_samples = self.args.max_samples - self.original_perturbations.shape[1]
                    concatenate_axis = 1
                    log('[Attack] found %d attempts with %d perturbations, computing %d more perturbations' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1], self.args.max_samples))
                elif self.original_perturbations.shape[1] == self.args.max_samples:
                    self.args.max_attempts = self.args.max_attempts - self.original_perturbations.shape[0]
                    concatenate_axis = 0
                    log('[Attack] found %d attempts with %d perturbations, computing %d more attempts' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1], self.args.max_attempts))

        # can't squeeze here!
        if self.test_images.shape[3] > 1:
            self.perturbations = numpy.zeros((self.args.max_attempts, self.args.max_samples, self.test_images.shape[1], self.test_images.shape[2], self.test_images.shape[3]))
        else:
            self.perturbations = numpy.zeros((self.args.max_attempts, self.args.max_samples, self.test_images.shape[1], self.test_images.shape[2]))
        self.success = numpy.ones((self.args.max_attempts, self.args.max_samples), dtype=int) * -1

        if self.args.attack.find('Batch') >= 0:
            batch_size = min(self.args.batch_size, self.args.max_samples)
        else:
            batch_size = 1

        objective = self.objective_class()
        num_batches = int(math.ceil(self.args.max_samples/batch_size))

        for i in range(num_batches):  # self.test_images.shape[0]
            if i*batch_size == self.args.max_samples:
                break
                
            i_start = i*batch_size
            i_end = min((i+1)*batch_size, self.args.max_samples)

            batch_images = common.torch.as_variable(self.test_images[i_start: i_end], self.args.use_gpu)
            batch_classes = common.torch.as_variable(numpy.array(self.test_codes[i_start: i_end]), self.args.use_gpu)
            batch_images = batch_images.permute(0, 3, 1, 2)

            t = 0
            while t < self.args.max_attempts:
                attack = self.setup_attack(batch_images, batch_classes)
                success, perturbations, probabilities, norm, _ = attack.run(objective)
                assert not numpy.any(perturbations != perturbations), perturbations

                # Note that we save the perturbed image, not only the perturbation!
                self.perturbations[t][i_start: i_end] = numpy.squeeze(numpy.transpose(perturbations + batch_images.cpu().numpy(), (0, 2, 3, 1)))
                self.success[t][i_start: i_end] = success

                # IMPORTANT: The adversarial examples are not considering whether the classifier is
                # actually correct to start with.

                t += 1

            log('[Attack] %d: completed' % i)

        if concatenate_axis >= 0:
            if self.perturbations.shape[0] == self.args.max_attempts:
                self.perturbations = numpy.concatenate((self.original_perturbations, self.perturbations), axis=concatenate_axis)
                self.success = numpy.concatenate((self.original_success, self.success), axis=concatenate_axis)
                log('[Attack] concatenated')

        utils.write_hdf5(self.args.perturbations_file, self.perturbations)
        log('[Attack] wrote %s' % self.args.perturbations_file)
        utils.write_hdf5(self.args.success_file, self.success)
        log('[Attack] wrote %s' % self.args.success_file)
Exemplo n.º 20
0
    def compute_statistics(self):
        """
        Compute statistics based on distances.
        """

        num_attempts = self.perturbations.shape[0]

        perturbations = numpy.swapaxes(self.perturbations, 0, 1)
        perturbations = perturbations.reshape(
            (perturbations.shape[0] * perturbations.shape[1],
             perturbations.shape[2]))
        success = numpy.swapaxes(self.success, 0, 1)
        success = success.reshape((success.shape[0] * success.shape[1]))

        probabilities = numpy.swapaxes(self.probabilities, 0, 1)
        probabilities = probabilities.reshape(
            (probabilities.shape[0] * probabilities.shape[1], -1))
        confidences = numpy.max(probabilities, 1)

        perturbation_probabilities = self.test_probabilities[:self.success.
                                                             shape[1]]
        perturbation_probabilities = numpy.repeat(perturbation_probabilities,
                                                  num_attempts,
                                                  axis=0)
        perturbation_confidences = numpy.max(perturbation_probabilities, 1)

        probability_ratios = confidences / perturbation_confidences

        raw_overall_success = success >= 0
        log('[Testing] %d valid attacks' % numpy.sum(raw_overall_success))

        # For off-manifold attacks this should not happen, but save is save.
        if not numpy.any(raw_overall_success):
            for type in [
                    'raw_success', 'raw_iteration', 'raw_roc',
                    'raw_confidence_weighted_success', 'raw_confidence',
                    'raw_ratios'
            ]:
                self.results[type] = 0
            if self.args.results_file:
                utils.write_pickle(self.args.results_file, self.results)
                log('[Testing] wrote %s' % self.args.results_file)
            log('[Testing] no successful attacks found, no plots')
            return

        #
        # We compute some simple statistics:
        # - raw success rate: fraction of successful attack without considering epsilon
        # - corrected success rate: fraction of successful attacks within epsilon-ball
        # - raw average perturbation: average distance to original samples (for successful attacks)
        # - corrected average perturbation: average distance to original samples for perturbations
        #   within epsilon-ball (for successful attacks).
        # These statistics can also be computed per class.
        # And these statistics are computed with respect to three norms.

        if self.args.plot_directory and utils.display():
            iterations = success[raw_overall_success]
            x = numpy.arange(numpy.max(iterations) + 1)
            y = numpy.bincount(iterations)
            plot_file = os.path.join(self.args.plot_directory, 'iterations')
            plot.bar(plot_file,
                     x,
                     y,
                     title='Distribution of Iterations of Successful Attacks',
                     xlabel='Number of Iterations',
                     ylabel='Count')
            log('[Testing] wrote %s' % plot_file)

            plot_file = os.path.join(self.args.plot_directory, 'probabilities')
            plot.histogram(plot_file, confidences[raw_overall_success], 50)
            log('[Testing] wrote %s' % plot_file)

            plot_file = os.path.join(self.args.plot_directory,
                                     'probability_ratios')
            plot.histogram(plot_file, probability_ratios, 50)
            log('[Testing] wrote %s' % plot_file)

            plot_file = os.path.join(self.args.plot_directory,
                                     'test_probabilities')
            plot.histogram(
                plot_file, self.test_probabilities[
                    numpy.arange(self.test_probabilities.shape[0]),
                    self.test_codes], 50)
            log('[Testing] wrote %s' % plot_file)

        y_true = numpy.concatenate(
            (numpy.zeros(confidences.shape[0]),
             numpy.ones(perturbation_confidences.shape[0])))
        y_score = numpy.concatenate((confidences, perturbation_confidences))
        roc_auc_score = sklearn.metrics.roc_auc_score(y_true, y_score)

        self.results['raw_roc'] = roc_auc_score
        self.results['raw_confidence_weighted_success'] = numpy.sum(
            confidences[raw_overall_success]) / numpy.sum(
                perturbation_confidences)
        self.results['raw_confidence'] = numpy.mean(
            probabilities[raw_overall_success])
        self.results['raw_ratios'] = numpy.mean(
            probability_ratios[raw_overall_success])
        self.results['raw_success'] = numpy.sum(
            raw_overall_success) / success.shape[0]
        self.results['raw_iteration'] = numpy.average(
            success[raw_overall_success])

        if self.args.results_file:
            utils.write_pickle(self.args.results_file, self.results)
            log('[Testing] wrote %s' % self.args.results_file)
Exemplo n.º 21
0
    def attack(self):
        """
        Test the model.
        """

        assert self.model is not None
        assert self.model.classifier.training is False

        concatenate_axis = -1
        if os.path.exists(self.args.perturbations_file) and os.path.exists(
                self.args.success_file):
            self.original_perturbations = utils.read_hdf5(
                self.args.perturbations_file)
            assert len(self.original_perturbations.shape) == 3
            log('[Attack] read %s' % self.args.perturbations_file)

            self.original_success = utils.read_hdf5(self.args.success_file)
            log('[Attack] read %s' % self.args.success_file)

            assert self.original_perturbations.shape[
                0] == self.original_success.shape[0]
            assert self.original_perturbations.shape[
                1] == self.original_success.shape[1]

            if self.original_perturbations.shape[
                    1] <= self.args.max_samples and self.original_perturbations.shape[
                        0] <= self.args.max_attempts:
                log('[Attack] found %d attempts, %d samples, requested no more'
                    % (self.original_perturbations.shape[0],
                       self.original_perturbations.shape[1]))
                return
            elif self.original_perturbations.shape[
                    0] == self.args.max_attempts or self.original_perturbations.shape[
                        1] == self.args.max_samples:
                if self.original_perturbations.shape[
                        0] == self.args.max_attempts:
                    self.test_images = self.test_images[
                        self.original_perturbations.shape[1]:]
                    self.test_codes = self.test_codes[
                        self.original_perturbations.shape[1]:]
                    self.args.max_samples = self.args.max_samples - self.original_perturbations.shape[
                        1]
                    concatenate_axis = 1
                    log('[Attack] found %d attempts with %d perturbations, computing %d more perturbations'
                        % (self.original_perturbations.shape[0],
                           self.original_perturbations.shape[1],
                           self.args.max_samples))
                elif self.original_perturbations.shape[
                        1] == self.args.max_samples:
                    self.args.max_attempts = self.args.max_attempts - self.original_perturbations.shape[
                        0]
                    concatenate_axis = 0
                    log('[Attack] found %d attempts with %d perturbations, computing %d more attempts'
                        % (self.original_perturbations.shape[0],
                           self.original_perturbations.shape[1],
                           self.args.max_attempts))

        self.perturbations = numpy.zeros(
            (self.args.max_attempts, self.args.max_samples, self.args.N_theta))
        self.success = numpy.ones(
            (self.args.max_attempts, self.args.max_samples), dtype=int) * -1

        if self.args.attack.find('Batch') >= 0:
            batch_size = min(self.args.batch_size, self.args.max_samples)
        else:
            batch_size = 1

        objective = self.objective_class()
        num_batches = int(math.ceil(self.args.max_samples / batch_size))

        for i in range(num_batches):
            if i * batch_size == self.args.max_samples:
                break

            i_start = i * batch_size
            i_end = min((i + 1) * batch_size, self.args.max_samples)

            batch_classes = common.torch.as_variable(
                self.test_codes[i_start:i_end], self.args.use_gpu)
            batch_theta = common.torch.as_variable(
                numpy.zeros((i_end - i_start, self.args.N_theta),
                            dtype=numpy.float32), self.args.use_gpu)
            if self.args.N_theta > 4:
                batch_theta[:, 4] = 1
            batch_images = common.torch.as_variable(
                self.test_images[i_start:i_end], self.args.use_gpu)
            batch_images = batch_images.permute(0, 3, 1, 2)

            self.model.decoder.set_image(batch_images)
            #output_images = self.model.decoder.forward(batch_theta)
            #error = torch.sum(torch.abs(output_images - batch_images))
            #error = error.item()
            #print(error)
            #from matplotlib import pyplot
            #output_images = numpy.squeeze(numpy.transpose(output_images.cpu().detach().numpy(), (0, 2, 3, 1)))
            #pyplot.imshow(output_images[0])
            #pyplot.show()

            t = 0
            while True and t < self.args.max_attempts:
                attack = self.setup_attack(batch_theta, batch_classes)
                success, perturbations, probabilities, norm, _ = attack.run(
                    objective)
                assert not numpy.any(
                    perturbations != perturbations), perturbations

                # Note that we save the perturbed image, not only the perturbation!
                perturbations = perturbations.reshape(batch_theta.size(
                ))  # hack for when only one dimensional latent space is used!
                self.perturbations[t][
                    i_start:i_end] = perturbations + batch_theta.cpu().detach(
                    ).numpy()
                self.success[t][i_start:i_end] = success
                t += 1

            log('[Attack] %d: completed' % i)

        if concatenate_axis >= 0:
            if self.perturbations.shape[0] == self.args.max_attempts:
                self.perturbations = numpy.concatenate(
                    (self.original_perturbations, self.perturbations),
                    axis=concatenate_axis)
                self.success = numpy.concatenate(
                    (self.original_success, self.success),
                    axis=concatenate_axis)
                log('[Attack] concatenated')

        utils.write_hdf5(self.args.perturbations_file, self.perturbations)
        log('[Attack] wrote %s' % self.args.perturbations_file)
        utils.write_hdf5(self.args.success_file, self.success)
        log('[Attack] wrote %s' % self.args.success_file)