Пример #1
0
    def test(self):
        """
        Test classifier to identify valid samples to attack.
        """

        num_batches = int(math.ceil(self.test_images.shape[0] / self.args.batch_size))
        self.model.eval()
        assert self.model.training is False
        assert self.test_images.shape[0] == self.test_codes.shape[0], 'number of samples have to match'

        for b in range(num_batches):
            b_start = b * self.args.batch_size
            b_end = min((b + 1) * self.args.batch_size, self.test_images.shape[0])
            batch_images = common.torch.as_variable(self.test_images[b_start: b_end], self.args.use_gpu)
            batch_classes = common.torch.as_variable(self.test_codes[b_start: b_end], self.args.use_gpu)
            batch_images = batch_images.permute(0, 3, 1, 2)

            output_classes = self.model(batch_images)
            values, indices = torch.max(torch.nn.functional.softmax(output_classes, dim=1), dim=1)
            errors = torch.abs(indices - batch_classes)

            self.accuracy = common.numpy.concatenate(self.accuracy, errors.data.cpu().numpy())

            if b % 100 == 0:
                log('[Attack] computing accuracy %d' % b)

        self.accuracy = self.accuracy == 0
        utils.write_hdf5(self.args.accuracy_file, self.accuracy)
        log('[Attack] wrote %s' % self.args.accuracy_file)

        accuracy = numpy.sum(self.accuracy)/float(self.accuracy.shape[0])
        log('[Attack] accuracy %g' % accuracy)
        accuracy = numpy.sum(self.accuracy[:self.args.max_samples]) / float(self.args.max_samples)
        log('[Attack] accuracy on %d samples %g' % (self.args.max_samples, accuracy))
Пример #2
0
    def test_random(self):
        """
        Test random.
        """

        pred_images = None
        codes = numpy.random.normal(
            0, 1, (1000, self.args.latent_space_size)).astype(numpy.float32)
        num_batches = int(math.ceil(codes.shape[0] / self.args.batch_size))

        for b in range(num_batches):
            b_start = b * self.args.batch_size
            b_end = min((b + 1) * self.args.batch_size,
                        self.test_images.shape[0])
            batch_codes = common.torch.as_variable(codes[b_start:b_end],
                                                   self.args.use_gpu)

            # To get the correct images!
            output_images = self.decoder(batch_codes)

            output_images = numpy.squeeze(
                numpy.transpose(output_images.cpu().detach().numpy(),
                                (0, 2, 3, 1)))
            pred_images = common.numpy.concatenate(pred_images, output_images)

            if b % 100 == 50:
                log('[Testing] %d' % b)

        utils.write_hdf5(self.args.random_file, pred_images)
        log('[Testing] wrote %s' % self.args.random_file)
    def main(self):
        """
        Main method.
        """

        database = utils.read_hdf5(self.args.database_file)
        log('[Data] read %s' % self.args.database_file)

        N_font = database.shape[0]
        N_class = database.shape[1]

        assert database.shape[2] == database.shape[3]
        database = database.reshape((database.shape[0] * database.shape[1],
                                     database.shape[2], database.shape[3]))
        database = torch.from_numpy(database).float()
        if self.args.use_gpu:
            database = database.cuda()

        database = torch.autograd.Variable(database)

        codes = utils.read_hdf5(self.args.codes_file)
        codes = codes[:, 0]
        codes = common.numpy.one_hot(codes, N_font * N_class)
        log('[Data] read %s' % self.args.codes_file)

        theta = utils.read_hdf5(self.args.theta_file)
        N = theta.shape[0]
        N_theta = theta.shape[1]
        log('[Data] read %s' % self.args.theta_file)

        model = models.OneHotDecoder(database, N_theta)
        images = []

        num_batches = int(math.ceil(float(N) / self.args.batch_size))
        for b in range(num_batches):
            batch_theta = torch.from_numpy(
                theta[b * self.args.batch_size:min((b + 1) *
                                                   self.args.batch_size, N)])
            batch_codes = torch.from_numpy(
                codes[b * self.args.batch_size:min((b + 1) *
                                                   self.args.batch_size, N)])
            batch_codes, batch_theta = batch_codes.float(), batch_theta.float()

            if self.args.use_gpu:
                batch_codes, batch_theta = batch_codes.cuda(
                ), batch_theta.cuda()

            batch_codes, batch_theta = torch.autograd.Variable(
                batch_codes), torch.autograd.Variable(batch_theta)
            output = model(batch_codes, batch_theta)

            images.append(output.data.cpu().numpy().squeeze())
            if b % 1000 == 0:
                log('[Data] processed %d/%d batches' % (b + 1, num_batches))

        images = numpy.concatenate(images, axis=0)
        if len(images.shape) > 3:
            images = numpy.transpose(images, (0, 2, 3, 1))
        utils.write_hdf5(self.args.images_file, images)
        log('[Data] wrote %s' % self.args.images_file)
Пример #4
0
    def main(self):
        """
        Main.
        """

        train_images_file = paths.celeba_train_images_file()
        test_images_file = paths.celeba_test_images_file()

        assert os.path.exists(train_images_file)
        assert os.path.exists(test_images_file)

        train_images = utils.read_hdf5(train_images_file)
        log('read %s' % train_images_file)

        test_images = utils.read_hdf5(test_images_file)
        log('read %s' % test_images_file)

        log('[Data] before train: %g %g' %
            (numpy.min(train_images), numpy.max(train_images)))
        log('[Data] before test: %g %g' %
            (numpy.min(train_images), numpy.max(train_images)))

        train_images *= 255
        test_images *= 255

        log('[Data] after train: %g %g' %
            (numpy.min(train_images), numpy.max(train_images)))
        log('[Data] after test: %g %g' %
            (numpy.min(train_images), numpy.max(train_images)))

        utils.write_hdf5(train_images_file, train_images.astype(numpy.float32))
        log('[Data] wrote %s' % train_images_file)
        utils.write_hdf5(test_images_file, test_images.astype(numpy.float32))
        log('[Data] wrote %s' % test_images_file)
Пример #5
0
def convert_dataset():
    """
    Convert MNIST.
    """

    filenames = [
        [paths.raw_mnist_train_images_file(),
         paths.mnist_train_images_file()],
        [paths.raw_mnist_test_images_file(),
         paths.mnist_test_images_file()],
        [paths.raw_mnist_train_labels_file(),
         paths.mnist_train_labels_file()],
        [paths.raw_mnist_test_labels_file(),
         paths.mnist_test_labels_file()]
    ]
    for names in filenames[:2]:
        with gzip.open(names[0], 'rb') as f:
            data = numpy.frombuffer(f.read(), numpy.uint8,
                                    offset=16).reshape(-1, 28, 28, 1)
            #data = data.swapaxes(1, 2)
            data = data.astype(numpy.float32) / 255.
            utils.write_hdf5(names[1], data)
            log('wrote %s' % names[1])
    for names in filenames[-2:]:
        with gzip.open(names[0], 'rb') as f:
            utils.write_hdf5(
                names[1],
                numpy.frombuffer(f.read(), numpy.uint8,
                                 offset=8).reshape(-1, 1).astype(numpy.int))
            log('wrote %s' % names[1])
    def loop(self):
        """
        Main loop for training and testing, saving ...
        """

        while self.epoch < self.args.epochs:
            log('[Training] %s' % self.scheduler.report())

            # Note that we test first, to also get the error of the untrained model.
            testing = elapsed(functools.partial(self.test))
            training = elapsed(functools.partial(self.train))
            log('[Training] %gs training, %gs testing' % (training, testing))

            if self.args.early_stopping:
                validation = elapsed(functools.partial(self.validate))
                log('[Training] %gs validation' % validation)

            # Save model checkpoint after each epoch.
            utils.remove(self.args.state_file + '.%d' % (self.epoch - 1))
            State.checkpoint(self.model, self.scheduler.optimizer, self.epoch,
                             self.args.state_file + '.%d' % self.epoch)
            log('[Training] %d: checkpoint' % self.epoch)
            torch.cuda.empty_cache()  # necessary?

            # Save statistics and plots.
            if self.args.training_file:
                utils.write_hdf5(self.args.training_file,
                                 self.train_statistics)
                log('[Training] %d: wrote %s' %
                    (self.epoch, self.args.training_file))
            if self.args.testing_file:
                utils.write_hdf5(self.args.testing_file, self.test_statistics)
                log('[Training] %d: wrote %s' %
                    (self.epoch, self.args.testing_file))

            if utils.display():
                self.plot()
            self.epoch += 1  # !

        # Final testing.
        testing = elapsed(functools.partial(self.test))
        log('[Training] %gs testing' % (testing))

        # Save model checkpoint after each epoch.
        utils.remove(self.args.state_file + '.%d' % (self.epoch - 1))
        State.checkpoint(self.model, self.scheduler.optimizer, self.epoch,
                         self.args.state_file)
        log('[Training] %d: checkpoint' % self.epoch)

        self.results = {
            'training_statistics': self.train_statistics,
            'testing_statistics': self.test_statistics,
        }
        if self.args.results_file:
            utils.write_pickle(self.args.results_file, self.results)
            log('[Training] wrote %s' % self.args.results_file)
Пример #7
0
    def test(self):
        """
        Test classifier to identify valid samples to attack.
        """

        num_batches = int(
            math.ceil(self.perturbations.shape[0] / self.args.batch_size))

        for b in range(num_batches):
            b_start = b * self.args.batch_size
            b_end = min((b + 1) * self.args.batch_size,
                        self.perturbations.shape[0])
            batch_fonts = self.test_fonts[b_start:b_end]
            batch_classes = self.test_classes[b_start:b_end]
            batch_code = numpy.concatenate(
                (common.numpy.one_hot(batch_fonts, self.N_font),
                 common.numpy.one_hot(batch_classes, self.N_class)),
                axis=1).astype(numpy.float32)

            batch_inputs = common.torch.as_variable(
                self.perturbations[b_start:b_end], self.args.use_gpu)
            batch_code = common.torch.as_variable(batch_code,
                                                  self.args.use_gpu)

            # This basically allows to only optimize over theta, keeping the font/class code fixed.
            self.model.set_code(batch_code)
            output_images = self.model(batch_inputs)

            output_images = numpy.squeeze(
                numpy.transpose(output_images.cpu().detach().numpy(),
                                (0, 2, 3, 1)))
            self.perturbation_images = common.numpy.concatenate(
                self.perturbation_images, output_images)

            if b % 100 == 0:
                log('[Testing] computing perturbation images %d' % b)

        utils.makedir(os.path.dirname(self.args.perturbation_images_file))
        if len(self.perturbation_images.shape) > 3:
            self.perturbation_images = self.perturbation_images.reshape(
                self.N_samples, self.N_attempts,
                self.perturbation_images.shape[1],
                self.perturbation_images.shape[2],
                self.perturbation_images.shape[3])
        else:
            self.perturbation_images = self.perturbation_images.reshape(
                self.N_samples, self.N_attempts,
                self.perturbation_images.shape[1],
                self.perturbation_images.shape[2])
        self.perturbation_images = numpy.swapaxes(self.perturbation_images, 0,
                                                  1)
        utils.write_hdf5(self.args.perturbation_images_file,
                         self.perturbation_images)
        log('[Testing] wrote %s' % self.args.perturbation_images_file)
Пример #8
0
def download():
    """
    Download and convert Cifar10.
    """

    trainset = torchvision.datasets.CIFAR10(root=paths.raw_cifar10_dir(),
                                            train=True,
                                            download=True)
    testset = torchvision.datasets.CIFAR10(root=paths.raw_cifar10_dir(),
                                           train=False,
                                           download=True)
    train_images = numpy.array(trainset.train_data)
    train_labels = numpy.array(trainset.train_labels)
    test_images = numpy.array(testset.test_data)
    test_labels = numpy.array(testset.test_labels)

    assert numpy.max(train_images) == 255

    train_images = train_images / 255.
    test_images = test_images / 255.

    utils.write_hdf5(paths.cifar10_train_images_file(),
                     train_images.astype(numpy.float32))
    log('wrote %s' % paths.cifar10_train_images_file())
    utils.write_hdf5(paths.cifar10_test_images_file(),
                     test_images.astype(numpy.float32))
    log('wrote %s' % paths.cifar10_test_images_file())
    utils.write_hdf5(paths.cifar10_train_labels_file(),
                     train_labels.reshape(-1, 1).astype(numpy.int))
    log('wrote %s' % paths.cifar10_train_labels_file())
    utils.write_hdf5(paths.cifar10_test_labels_file(),
                     test_labels.reshape(-1, 1).astype(numpy.int))
    log('wrote %s' % paths.cifar10_test_labels_file())
Пример #9
0
def convert_dataset():
    """
    Convert SVHN.
    """

    data = sio.loadmat(paths.raw_svhn_train_file())

    # access to the dict
    images = data['X']
    images = images.transpose(3, 0, 1, 2)
    images = images / 255.
    labels = data['y'] - 1
    #print(images[0], numpy.max(images), numpy.min(images))

    utils.write_hdf5(paths.svhn_train_images_file(),
                     images.astype(numpy.float32))
    log('wrote %s' % paths.svhn_train_images_file())
    utils.write_hdf5(paths.svhn_train_labels_file(),
                     labels.reshape(-1, 1).astype(numpy.int))
    log('wrote %s' % paths.svhn_train_labels_file())

    data = sio.loadmat(paths.raw_svhn_test_file())

    # access to the dict
    images = data['X']
    images = images.transpose(3, 0, 1, 2)
    images = images / 255.
    labels = data['y'] - 1

    utils.write_hdf5(paths.svhn_test_images_file(),
                     images.astype(numpy.float32))
    log('wrote %s' % paths.svhn_test_images_file())
    utils.write_hdf5(paths.svhn_test_labels_file(),
                     labels.reshape(-1, 1).astype(numpy.int))
    log('wrote %s' % paths.svhn_test_labels_file())
    def test(self):
        """
        Test classifier to identify valid samples to attack.
        """

        num_batches = int(
            math.ceil(self.perturbations.shape[0] / self.args.batch_size))

        for b in range(num_batches):
            b_start = b * self.args.batch_size
            b_end = min((b + 1) * self.args.batch_size,
                        self.perturbations.shape[0])
            batch_images = common.torch.as_variable(
                self.test_images[b_start:b_end], self.args.use_gpu)
            batch_inputs = common.torch.as_variable(
                self.perturbations[b_start:b_end], self.args.use_gpu)

            self.model.set_image(batch_images)
            output_images = self.model(batch_inputs)

            output_images = numpy.squeeze(
                numpy.transpose(output_images.cpu().detach().numpy(),
                                (0, 2, 3, 1)))
            self.perturbation_images = common.numpy.concatenate(
                self.perturbation_images, output_images)

            if b % 100 == 0:
                log('[Testing] computing perturbation images %d' % b)

        utils.makedir(os.path.dirname(self.args.perturbation_images_file))
        if len(self.perturbation_images.shape) > 3:
            self.perturbation_images = self.perturbation_images.reshape(
                self.N_samples, self.N_attempts,
                self.perturbation_images.shape[1],
                self.perturbation_images.shape[2],
                self.perturbation_images.shape[3])
        else:
            self.perturbation_images = self.perturbation_images.reshape(
                self.N_samples, self.N_attempts,
                self.perturbation_images.shape[1],
                self.perturbation_images.shape[2])
        self.perturbation_images = numpy.swapaxes(self.perturbation_images, 0,
                                                  1)
        utils.write_hdf5(self.args.perturbation_images_file,
                         self.perturbation_images)
        log('[Testing] wrote %s' % self.args.perturbation_images_file)
Пример #11
0
    def main(self):
        """
        Main.
        """

        with open(paths.raw_celeba_labels_file(), 'r') as f:
            lines = f.read().split('\n')
            lines = [line for line in lines if line]
            lines = lines[1:]

            attributes = [str(attribute) for attribute in lines[0].split(' ') if attribute]
            lines = lines[1:]

            labels = []
            for line in lines:
                values = [int(value) for value in line.split(' ')[1:] if value]
                assert len(values) == len(attributes)
                labels.append(values)

            labels = numpy.array(labels)
            labels[labels == -1] = 0

            def statistics(labels):
                """
                Label statistics.
                """

                for i in range(len(attributes)):
                    positive = numpy.sum(labels[:, i] == 1)
                    negative = numpy.sum(labels[:, i] == 0)
                    log('%d. attribute %s: %d %d' % (i, attributes[i], positive, negative))

            N = labels.shape[0]
            N_train = int(0.9*N)

            train_labels = labels[:N_train]
            test_labels = labels[N_train:]

            statistics(labels)
            statistics(train_labels)
            statistics(test_labels)

            utils.write_hdf5(paths.celeba_train_labels_file(), train_labels.reshape(-1, 1).astype(numpy.int))
            log('wrote %s' % paths.celeba_train_labels_file())
            utils.write_hdf5(paths.celeba_test_labels_file(), test_labels.reshape(-1, 1).astype(numpy.int))
            log('wrote %s' % paths.celeba_test_labels_file())
    def sample(self):
        """
        Test the model.
        """

        assert self.decoder is not None

        self.decoder.eval()
        log('[Sampling] set decoder to eval')

        images = None

        theta = common.numpy.truncated_normal(
            (self.args.N_samples, self.args.latent_space_size),
            lower=-self.args.bound,
            upper=self.args.bound).astype(numpy.float32)
        theta = theta.astype(numpy.float32)
        num_batches = int(math.ceil(theta.shape[0] / self.args.batch_size))

        for b in range(num_batches):
            b_start = b * self.args.batch_size
            b_end = min((b + 1) * self.args.batch_size, theta.shape[0])

            batch_theta = common.torch.as_variable(theta[b_start:b_end],
                                                   self.args.use_gpu)

            # Important to get the correct codes!
            assert self.decoder.training is False
            output_images = self.decoder(batch_theta)

            output_images = numpy.squeeze(
                numpy.transpose(output_images.cpu().detach().numpy(),
                                (0, 2, 3, 1)))
            images = common.numpy.concatenate(images, output_images)

            if b % 100 == 50:
                log('[Sampling] %d' % b)

        if self.args.images_file:
            utils.write_hdf5(self.args.images_file, images)
            log('[Sampling] wrote %s' % self.args.images_file)

        if self.args.theta_file:
            utils.write_hdf5(self.args.theta_file, theta)
            log('[Sampling] wrote %s' % self.args.theta_file)
Пример #13
0
    def test(self):
        """
        Test classifier to identify valid samples to attack.
        """

        num_batches = int(
            math.ceil(self.test_theta.shape[0] / self.args.batch_size))

        for b in range(num_batches):
            b_start = b * self.args.batch_size
            b_end = min((b + 1) * self.args.batch_size,
                        self.test_theta.shape[0])

            batch_classes = common.torch.as_variable(
                self.test_codes[b_start:b_end], self.args.use_gpu)
            batch_inputs = common.torch.as_variable(
                self.test_theta[b_start:b_end], self.args.use_gpu)

            if isinstance(self.model.decoder, models.SelectiveDecoder):
                self.model.decoder.set_code(batch_classes)

            output_classes = self.model(batch_inputs)
            values, indices = torch.max(torch.nn.functional.softmax(
                output_classes, dim=1),
                                        dim=1)
            errors = torch.abs(indices - batch_classes)

            self.accuracy = common.numpy.concatenate(self.accuracy,
                                                     errors.data.cpu().numpy())

            if b % 100 == 0:
                log('[Attack] computing accuracy %d' % b)

        self.accuracy = self.accuracy == 0
        utils.write_hdf5(self.args.accuracy_file, self.accuracy)
        log('[Attack] wrote %s' % self.args.accuracy_file)

        accuracy = numpy.sum(self.accuracy) / float(self.accuracy.shape[0])
        log('[Attack] accuracy %g' % accuracy)
        accuracy = numpy.sum(self.accuracy[:self.args.max_samples]) / float(
            self.args.max_samples)
        log('[Attack] accuracy on %d samples %g' %
            (self.args.max_samples, accuracy))
Пример #14
0
    def test_interpolation(self):
        """
        Test interpolation.
        """

        interpolations = None
        perm = numpy.random.permutation(
            numpy.array(range(self.pred_codes.shape[0])))

        for i in range(50):
            first = self.pred_codes[i]
            second = self.pred_codes[perm[i]]
            linfit = scipy.interpolate.interp1d([0, 1],
                                                numpy.vstack([first, second]),
                                                axis=0)
            interpolations = common.numpy.concatenate(
                interpolations, linfit(numpy.linspace(0, 1, 10)))

        pred_images = None
        num_batches = int(
            math.ceil(interpolations.shape[0] / self.args.batch_size))
        interpolations = interpolations.astype(numpy.float32)

        for b in range(num_batches):
            b_start = b * self.args.batch_size
            b_end = min((b + 1) * self.args.batch_size,
                        self.test_images.shape[0])
            batch_codes = common.torch.as_variable(
                interpolations[b_start:b_end], self.args.use_gpu)

            # To get the correct images!
            output_images = self.decoder(batch_codes)

            output_images = numpy.squeeze(
                numpy.transpose(output_images.cpu().detach().numpy(),
                                (0, 2, 3, 1)))
            pred_images = common.numpy.concatenate(pred_images, output_images)

            if b % 100 == 50:
                log('[Testing] %d' % b)

        utils.write_hdf5(self.args.interpolation_file, pred_images)
        log('[Testing] wrote %s' % self.args.interpolation_file)
def create_hdf5(img_h5_path,
                label_h5_path,
                img_label_path,
                sep=',',
                keys='tensor'):
    assert os.path.isfile(img_label_path)

    df = pd.read_csv(img_label_path, sep=sep, names=['name', 'label'])

    # Store image names and labels in numpy arrays
    img_paths = df['name'].to_numpy().astype(str)
    labels = df['label'].to_numpy().astype(np.int)

    # Check that labels are scalar integers
    assert len(labels.shape) == 1

    # Check that number of images and labels are equal
    assert img_paths.shape[0] == labels.shape[0]

    transform = transforms.ToTensor()

    images = np.empty((len(img_paths), ) + tuple(
        transform(Image.open(img_paths[0]).convert('RGB')).permute(1, 2,
                                                                   0).shape),
                      dtype=np.float32)

    for idx, img_path in enumerate(img_paths):
        img = transform(Image.open(img_path).convert('RGB'))

        # Change from C x H x W format to H x W x C format as expected by test.attack
        assert len(img.shape) == 3
        img = img.permute(1, 2, 0)
        images[idx] = img

    print("Writing image hdf5...")
    utils.write_hdf5(img_h5_path, images, keys)

    print("Writing label hdf5...")
    utils.write_hdf5(label_h5_path, labels, keys)
Пример #16
0
    def main(self):
        """
        Main.
        """

        filepaths = utils.read_ordered_directory(paths.raw_celeba_images_dir())
        log('reading %s' % paths.raw_celeba_images_dir())

        images = []
        for filepath in filepaths:
            log('processing %s' % os.path.basename(filepath))
            image = imageio.imread(filepath)
            width = 54
            height = int(width*image.shape[0]/float(image.shape[1]))
            image = skimage.transform.resize(image, (height, width))
            image = image[5:image.shape[0] - 5, 3:image.shape[1]-3, :]
            # Note that images are already scaled to [0, 1] here!
            #image = image/255.
            #print(numpy.min(image), numpy.max(image))
            assert numpy.min(image) >= 0 and numpy.max(image) <= 1
            images.append(image)

            #print(image.shape)
            #pyplot.imshow(image)
            #pyplot.show()

        images = numpy.array(images)
        log('%g %g' % (numpy.min(images), numpy.max(images)))
        N = images.shape[0]
        N_train = int(0.9 * N)

        train_images = images[:N_train]
        test_images = images[N_train:]

        utils.write_hdf5(paths.celeba_train_images_file(), train_images.astype(numpy.float32))
        log('wrote %s' % paths.celeba_train_images_file())
        utils.write_hdf5(paths.celeba_test_images_file(), test_images.astype(numpy.float32))
        log('wrote %s' % paths.celeba_test_images_file())
    def test(self):
        """
        Test classifier to identify valid samples to attack.
        """

        num_batches = int(math.ceil(self.test_theta.shape[0] / self.args.batch_size))

        for b in range(num_batches):
            b_start = b * self.args.batch_size
            b_end = min((b + 1) * self.args.batch_size, self.test_theta.shape[0])
            batch_fonts = self.test_fonts[b_start: b_end]
            batch_classes = self.test_classes[b_start: b_end]
            batch_code = numpy.concatenate((common.numpy.one_hot(batch_fonts, self.N_font), common.numpy.one_hot(batch_classes, self.N_class)), axis=1).astype(numpy.float32)

            batch_classes = common.torch.as_variable(batch_classes, self.args.use_gpu)
            batch_inputs = common.torch.as_variable(self.test_theta[b_start: b_end], self.args.use_gpu)
            batch_code = common.torch.as_variable(batch_code, self.args.use_gpu)

            # This basically allows to only optimize over theta, keeping the font/class code fixed.
            self.model.decoder.set_code(batch_code)

            output_classes = self.model(batch_inputs)
            values, indices = torch.max(torch.nn.functional.softmax(output_classes, dim=1), dim=1)
            errors = torch.abs(indices - batch_classes)

            self.accuracy = common.numpy.concatenate(self.accuracy, errors.data.cpu().numpy())

            if b % 100 == 0:
                log('[Attack] computing accuracy %d' % b)

        self.accuracy = self.accuracy == 0
        utils.write_hdf5(self.args.accuracy_file, self.accuracy)
        log('[Attack] wrote %s' % self.args.accuracy_file)

        accuracy = numpy.sum(self.accuracy) / float(self.accuracy.shape[0])
        log('[Attack] accuracy %g' % accuracy)
        accuracy = numpy.sum(self.accuracy[:self.args.max_samples]) / float(self.args.max_samples)
        log('[Attack] accuracy on %d samples %g' % (self.args.max_samples, accuracy))
    def main(self):
        """
        Main method.
        """

        codes = utils.read_hdf5(self.args.codes_file)
        log('[Data] read %s' % self.args.codes_file)

        theta = utils.read_hdf5(self.args.theta_file)
        log('[Data] read %s' % self.args.theta_file)

        images = utils.read_hdf5(self.args.images_file)
        log('[Data] read %s' % self.args.images_file)

        #
        # The set is not splitted randomly or so.
        # This simplifies training set subselection while enforcing balanced datasets.
        # For example, for 10 classes, every subset that is a multiple of 10 will
        # be balanced by construction.
        #

        N = codes.shape[0]
        N_train = self.args.N_train

        train_codes = codes[:N_train]
        test_codes = codes[N_train:]

        train_theta = theta[:N_train]
        test_theta = theta[N_train:]

        train_images = images[:N_train]
        test_images = images[N_train:]

        utils.write_hdf5(self.args.train_codes_file, train_codes)
        log('[Data] wrote %s' % self.args.train_codes_file)
        utils.write_hdf5(self.args.test_codes_file, test_codes)
        log('[Data] wrote %s' % self.args.test_codes_file)

        utils.write_hdf5(self.args.train_theta_file, train_theta)
        log('[Data] wrote %s' % self.args.train_theta_file)
        utils.write_hdf5(self.args.test_theta_file, test_theta)
        log('[Data] wrote %s' % self.args.test_theta_file)

        utils.write_hdf5(self.args.train_images_file, train_images)
        log('[Data] wrote %s' % self.args.train_images_file)
        utils.write_hdf5(self.args.test_images_file, test_images)
        log('[Data] wrote %s' % self.args.test_images_file)
Пример #19
0
    def test(self):
        """
        Test the model.
        """

        assert self.model is not None
        assert self.model.training is False
        assert self.test_images.shape[0] == self.test_codes.shape[
            0], 'number of samples have to match'

        self.loss = 0.
        self.error = 0.
        num_batches = int(
            math.ceil(self.test_images.shape[0] / self.args.batch_size))

        for b in range(num_batches):
            b_start = b * self.args.batch_size
            b_end = min((b + 1) * self.args.batch_size,
                        self.test_images.shape[0])
            batch_images = common.torch.as_variable(
                self.test_images[b_start:b_end], self.args.use_gpu)
            batch_classes = common.torch.as_variable(
                self.test_codes[b_start:b_end], self.args.use_gpu)
            batch_images = batch_images.permute(0, 3, 1, 2)

            output_classes = self.model(batch_images)
            e = torch.nn.functional.cross_entropy(output_classes,
                                                  batch_classes,
                                                  size_average=True)
            self.loss += e.item()

            values, indices = torch.max(torch.nn.functional.softmax(
                output_classes, dim=1),
                                        dim=1)
            errors = torch.abs(indices - batch_classes)
            e = torch.sum(errors > 0).float() / batch_classes.size()[0]
            self.error += e.item()

            self.accuracy = common.numpy.concatenate(self.accuracy,
                                                     errors.data.cpu().numpy())

        self.loss /= num_batches
        self.error /= num_batches
        log('[Testing] test loss %g; test error %g' % (self.loss, self.error))

        self.accuracy = self.accuracy == 0
        if self.args.accuracy_file:
            utils.write_hdf5(self.args.accuracy_file, self.accuracy)
            log('[Testing] wrote %s' % self.args.accuracy_file)

        accuracy = numpy.sum(self.accuracy) / self.accuracy.shape[0]
        if numpy.abs(1 - accuracy - self.error) < 1e-4:
            log('[Testing] accuracy file is with %g accuracy correct' %
                accuracy)

        self.results = {
            'loss': self.loss,
            'error': self.error,
        }
        if self.args.results_file:
            utils.write_pickle(self.args.results_file, self.results)
            log('[Testing] wrote %s' % self.args.results_file)
    def test(self):
        """
        Test classifier to identify valid samples to attack.
        """

        self.model.eval()
        assert self.model.training is False
        assert self.perturbation_codes.shape[0] == self.perturbations.shape[0]
        assert self.test_codes.shape[0] == self.test_images.shape[0]
        assert len(self.perturbations.shape) == 4
        assert len(self.test_images.shape) == 4

        perturbations_accuracy = None
        num_batches = int(math.ceil(self.perturbations.shape[0] / self.args.batch_size))

        for b in range(num_batches):
            b_start = b * self.args.batch_size
            b_end = min((b + 1) * self.args.batch_size, self.perturbations.shape[0])
            batch_perturbations = common.torch.as_variable(self.perturbations[b_start: b_end], self.args.use_gpu)
            batch_classes = common.torch.as_variable(self.perturbation_codes[b_start: b_end], self.args.use_gpu)
            batch_perturbations = batch_perturbations.permute(0, 3, 1, 2)

            output_classes = self.model(batch_perturbations)
            values, indices = torch.max(torch.nn.functional.softmax(output_classes, dim=1), dim=1)
            errors = torch.abs(indices - batch_classes)
            perturbations_accuracy = common.numpy.concatenate(perturbations_accuracy, errors.data.cpu().numpy())

            for n in range(batch_perturbations.size(0)):
                log('[Testing] %d: original success=%d, transfer accuracy=%d' % (n, self.original_success[b_start + n], errors[n].item()))

        self.transfer_success[perturbations_accuracy == 0] = -1
        self.transfer_success = self.transfer_success.reshape((self.N_samples, self.N_attempts))
        self.transfer_success = numpy.swapaxes(self.transfer_success, 0, 1)

        utils.makedir(os.path.dirname(self.args.transfer_success_file))
        utils.write_hdf5(self.args.transfer_success_file, self.transfer_success)
        log('[Testing] wrote %s' % self.args.transfer_success_file)

        num_batches = int(math.ceil(self.test_images.shape[0] / self.args.batch_size))
        for b in range(num_batches):
            b_start = b * self.args.batch_size
            b_end = min((b + 1) * self.args.batch_size, self.test_images.shape[0])
            batch_images = common.torch.as_variable(self.test_images[b_start: b_end], self.args.use_gpu)
            batch_classes = common.torch.as_variable(self.test_codes[b_start: b_end], self.args.use_gpu)
            batch_images = batch_images.permute(0, 3, 1, 2)

            output_classes = self.model(batch_images)
            values, indices = torch.max(torch.nn.functional.softmax(output_classes, dim=1), dim=1)
            errors = torch.abs(indices - batch_classes)

            self.transfer_accuracy = common.numpy.concatenate(self.transfer_accuracy, errors.data.cpu().numpy())

            if b % 100 == 0:
                log('[Testing] computing accuracy %d' % b)

        self.transfer_accuracy = self.transfer_accuracy == 0
        log('[Testing] original accuracy=%g' % (numpy.sum(self.original_accuracy)/float(self.original_accuracy.shape[0])))
        log('[Testing] transfer accuracy=%g' % (numpy.sum(self.transfer_accuracy)/float(self.transfer_accuracy.shape[0])))
        log('[Testing] accuracy difference=%g' % (numpy.sum(self.transfer_accuracy != self.original_accuracy)/float(self.transfer_accuracy.shape[0])))
        log('[Testing] accuracy difference on %d samples=%g' % (self.N_samples, numpy.sum(self.transfer_accuracy[:self.N_samples] != self.original_accuracy[:self.N_samples])/float(self.N_samples)))
        self.transfer_accuracy = numpy.logical_and(self.transfer_accuracy, self.original_accuracy)

        utils.makedir(os.path.dirname(self.args.transfer_accuracy_file))
        utils.write_hdf5(self.args.transfer_accuracy_file, self.transfer_accuracy)
        log('[Testing] wrote %s' % self.args.transfer_accuracy_file)
Пример #21
0
    def attack(self):
        """
        Test the model.
        """

        assert self.model is not None
        assert self.model.classifier.training is False

        concatenate_axis = -1
        if os.path.exists(self.args.perturbations_file) and os.path.exists(
                self.args.success_file):
            self.original_perturbations = utils.read_hdf5(
                self.args.perturbations_file)
            assert len(self.original_perturbations.shape) == 3
            log('[Attack] read %s' % self.args.perturbations_file)

            self.original_success = utils.read_hdf5(self.args.success_file)
            log('[Attack] read %s' % self.args.success_file)

            assert self.original_perturbations.shape[
                0] == self.original_success.shape[0]
            assert self.original_perturbations.shape[
                1] == self.original_success.shape[1]

            if self.original_perturbations.shape[
                    1] <= self.args.max_samples and self.original_perturbations.shape[
                        0] <= self.args.max_attempts:
                log('[Attack] found %d attempts, %d samples, requested no more'
                    % (self.original_perturbations.shape[0],
                       self.original_perturbations.shape[1]))
                return
            elif self.original_perturbations.shape[
                    0] == self.args.max_attempts or self.original_perturbations.shape[
                        1] == self.args.max_samples:
                if self.original_perturbations.shape[
                        0] == self.args.max_attempts:
                    self.test_images = self.test_images[
                        self.original_perturbations.shape[1]:]
                    self.test_codes = self.test_codes[
                        self.original_perturbations.shape[1]:]
                    self.args.max_samples = self.args.max_samples - self.original_perturbations.shape[
                        1]
                    concatenate_axis = 1
                    log('[Attack] found %d attempts with %d perturbations, computing %d more perturbations'
                        % (self.original_perturbations.shape[0],
                           self.original_perturbations.shape[1],
                           self.args.max_samples))
                elif self.original_perturbations.shape[
                        1] == self.args.max_samples:
                    self.args.max_attempts = self.args.max_attempts - self.original_perturbations.shape[
                        0]
                    concatenate_axis = 0
                    log('[Attack] found %d attempts with %d perturbations, computing %d more attempts'
                        % (self.original_perturbations.shape[0],
                           self.original_perturbations.shape[1],
                           self.args.max_attempts))

        self.perturbations = numpy.zeros(
            (self.args.max_attempts, self.args.max_samples, self.args.N_theta))
        self.success = numpy.ones(
            (self.args.max_attempts, self.args.max_samples), dtype=int) * -1

        if self.args.attack.find('Batch') >= 0:
            batch_size = min(self.args.batch_size, self.args.max_samples)
        else:
            batch_size = 1

        objective = self.objective_class()
        num_batches = int(math.ceil(self.args.max_samples / batch_size))

        for i in range(num_batches):
            if i * batch_size == self.args.max_samples:
                break

            i_start = i * batch_size
            i_end = min((i + 1) * batch_size, self.args.max_samples)

            batch_classes = common.torch.as_variable(
                self.test_codes[i_start:i_end], self.args.use_gpu)
            batch_theta = common.torch.as_variable(
                numpy.zeros((i_end - i_start, self.args.N_theta),
                            dtype=numpy.float32), self.args.use_gpu)
            if self.args.N_theta > 4:
                batch_theta[:, 4] = 1
            batch_images = common.torch.as_variable(
                self.test_images[i_start:i_end], self.args.use_gpu)
            batch_images = batch_images.permute(0, 3, 1, 2)

            self.model.decoder.set_image(batch_images)
            #output_images = self.model.decoder.forward(batch_theta)
            #error = torch.sum(torch.abs(output_images - batch_images))
            #error = error.item()
            #print(error)
            #from matplotlib import pyplot
            #output_images = numpy.squeeze(numpy.transpose(output_images.cpu().detach().numpy(), (0, 2, 3, 1)))
            #pyplot.imshow(output_images[0])
            #pyplot.show()

            t = 0
            while True and t < self.args.max_attempts:
                attack = self.setup_attack(batch_theta, batch_classes)
                success, perturbations, probabilities, norm, _ = attack.run(
                    objective)
                assert not numpy.any(
                    perturbations != perturbations), perturbations

                # Note that we save the perturbed image, not only the perturbation!
                perturbations = perturbations.reshape(batch_theta.size(
                ))  # hack for when only one dimensional latent space is used!
                self.perturbations[t][
                    i_start:i_end] = perturbations + batch_theta.cpu().detach(
                    ).numpy()
                self.success[t][i_start:i_end] = success
                t += 1

            log('[Attack] %d: completed' % i)

        if concatenate_axis >= 0:
            if self.perturbations.shape[0] == self.args.max_attempts:
                self.perturbations = numpy.concatenate(
                    (self.original_perturbations, self.perturbations),
                    axis=concatenate_axis)
                self.success = numpy.concatenate(
                    (self.original_success, self.success),
                    axis=concatenate_axis)
                log('[Attack] concatenated')

        utils.write_hdf5(self.args.perturbations_file, self.perturbations)
        log('[Attack] wrote %s' % self.args.perturbations_file)
        utils.write_hdf5(self.args.success_file, self.success)
        log('[Attack] wrote %s' % self.args.success_file)
Пример #22
0
    def test_test(self):
        """
        Test on testing set.
        """

        num_batches = int(
            math.ceil(self.test_images.shape[0] / self.args.batch_size))

        for b in range(num_batches):
            b_start = b * self.args.batch_size
            b_end = min((b + 1) * self.args.batch_size,
                        self.test_images.shape[0])

            batch_images = common.torch.as_variable(
                self.test_images[b_start:b_end], self.args.use_gpu)
            batch_images = batch_images.permute(0, 3, 1, 2)

            # Important to get the correct codes!
            output_codes, output_logvar = self.encoder(batch_images)
            output_images = self.decoder(output_codes)
            e = self.reconstruction_loss(batch_images, output_images)
            self.reconstruction_error += e.data

            self.code_mean += torch.mean(output_codes).item()
            self.code_var += torch.var(output_codes).item()

            output_images = numpy.squeeze(
                numpy.transpose(output_images.cpu().detach().numpy(),
                                (0, 2, 3, 1)))
            self.pred_images = common.numpy.concatenate(
                self.pred_images, output_images)

            output_codes = output_codes.cpu().detach().numpy()
            self.pred_codes = common.numpy.concatenate(self.pred_codes,
                                                       output_codes)

            if b % 100 == 50:
                log('[Testing] %d' % b)

        assert self.pred_images.shape[0] == self.test_images.shape[
            0], 'computed invalid number of test images'
        if self.args.reconstruction_file:
            utils.write_hdf5(self.args.reconstruction_file, self.pred_images)
            log('[Testing] wrote %s' % self.args.reconstruction_file)

        if self.args.test_theta_file:
            assert self.pred_codes.shape[0] == self.test_images.shape[
                0], 'computed invalid number of test codes'
            utils.write_hdf5(self.args.test_theta_file, self.pred_codes)
            log('[Testing] wrote %s' % self.args.test_theta_file)

        threshold = 0.9
        percentage = 0
        # values = numpy.linalg.norm(pred_codes, ord=2, axis=1)
        values = numpy.max(numpy.abs(self.pred_codes), axis=1)

        while percentage < 0.9:
            threshold += 0.1
            percentage = numpy.sum(values <= threshold) / float(
                values.shape[0])
            log('[Testing] threshold %g percentage %g' %
                (threshold, percentage))
        log('[Testing] taking threshold %g with percentage %g' %
            (threshold, percentage))

        if self.args.output_directory and utils.display():
            # fit = 10
            # plot_file = os.path.join(self.args.output_directory, 'test_codes')
            # plot.manifold(plot_file, pred_codes[::fit], None, None, 'tsne', None, title='t-SNE of Test Codes')
            # log('[Testing] wrote %s' % plot_file)

            for d in range(1, self.pred_codes.shape[1]):
                plot_file = os.path.join(self.args.output_directory,
                                         'test_codes_%s' % d)
                plot.scatter(
                    plot_file,
                    self.pred_codes[:, 0],
                    self.pred_codes[:, d], (values <= threshold).astype(int),
                    ['greater %g' % threshold,
                     'smaller %g' % threshold],
                    title='Dimensions 0 and %d of Test Codes' % d)
                log('[Testing] wrote %s' % plot_file)

        self.reconstruction_error /= num_batches
        log('[Testing] reconstruction error %g' % self.reconstruction_error)
    def loop(self):
        """
        Main loop for training and testing, saving ...
        """

        auto_encoder_params = {
            'lr': self.args.base_lr,
            'lr_decay': self.args.base_lr_decay,
            'lr_min': 0.000000001,
            'weight_decay': self.args.weight_decay
        }

        classifier_params = {
            'lr': self.args.base_lr,
            'lr_decay': self.args.base_lr_decay,
            'lr_min': 0.000000001,
            'weight_decay': self.args.weight_decay
        }

        e = 0
        if os.path.exists(self.args.encoder_file) and os.path.exists(
                self.args.decoder_file) and os.path.exists(
                    self.args.classifier_file):
            state = State.load(self.args.encoder_file)
            log('[Training] loaded %s' % self.args.encoder_file)
            self.encoder.load_state_dict(state.model)
            log('[Training] loaded encoder')

            if self.args.use_gpu and not cuda.is_cuda(self.encoder):
                self.encoder = self.encoder.cuda()

            optimizer = torch.optim.Adam(list(self.encoder.parameters()),
                                         auto_encoder_params['lr'])
            optimizer.load_state_dict(state.optimizer)
            self.encoder_scheduler = ADAMScheduler(optimizer,
                                                   **auto_encoder_params)

            state = State.load(self.args.decoder_file)
            log('[Training] loaded %s' % self.args.decoder_file)
            self.decoder.load_state_dict(state.model)
            log('[Training] loaded decoder')

            if self.args.use_gpu and not cuda.is_cuda(self.decoder):
                self.decoder = self.decoder.cuda()

            optimizer = torch.optim.Adam(list(self.decoder.parameters()),
                                         auto_encoder_params['lr'])
            optimizer.load_state_dict(state.optimizer)
            self.decoder_scheduler = ADAMScheduler(optimizer,
                                                   **auto_encoder_params)

            state = State.load(self.args.classifier_file)
            log('[Training] loaded %s' % self.args.classifier_file)
            self.classifier.load_state_dict(state.model)
            log('[Training] loaded decoder')

            if self.args.use_gpu and not cuda.is_cuda(self.classifier):
                self.classifier = self.classifier.cuda()

            optimizer = torch.optim.Adam(list(self.classifier.parameters()),
                                         classifier_params['lr'])
            optimizer.load_state_dict(state.optimizer)
            self.classifier_scheduler = ADAMScheduler(optimizer,
                                                      **classifier_params)

            e = state.epoch + 1
            self.encoder_scheduler.update(e)
            self.decoder_scheduler.udpate(e)
            self.classifier_scheduler.update(e)
        else:
            if self.args.use_gpu and not cuda.is_cuda(self.encoder):
                self.encoder = self.encoder.cuda()
            if self.args.use_gpu and not cuda.is_cuda(self.decoder):
                self.decoder = self.decoder.cuda()
            if self.args.use_gpu and not cuda.is_cuda(self.classifier):
                self.classifier = self.classifier.cuda()

            self.encoder_scheduler = ADAMScheduler(
                list(self.encoder.parameters()), **auto_encoder_params)
            self.encoder_scheduler.initialize()  # !

            self.decoder_scheduler = ADAMScheduler(
                list(self.decoder.parameters()), **auto_encoder_params)
            self.decoder_scheduler.initialize()  # !

            self.classifier_scheduler = ADAMScheduler(
                list(self.classifier.parameters()), **classifier_params)
            self.classifier_scheduler.initialize()  # !

        log('[Training] model needs %gMiB' %
            (cuda.estimate_size(self.encoder) / (1024 * 1024)))

        while e < self.args.epochs:
            log('[Training] %s' % self.encoder_scheduler.report())
            log('[Training] %s' % self.decoder_scheduler.report())
            log('[Training] %s' % self.classifier_scheduler.report())

            testing = elapsed(functools.partial(self.test, e))
            training = elapsed(functools.partial(self.train, e))
            log('[Training] %gs training, %gs testing' % (training, testing))

            #utils.remove(self.args.encoder_file + '.%d' % (e - 1))
            #utils.remove(self.args.decoder_file + '.%d' % (e - 1))
            #utils.remove(self.args.classifier_file + '.%d' % (e - 1))
            State.checkpoint(self.encoder, self.encoder_scheduler.optimizer, e,
                             self.args.encoder_file + '.%d' % e)
            State.checkpoint(self.decoder, self.decoder_scheduler.optimizer, e,
                             self.args.decoder_file + '.%d' % e)
            State.checkpoint(self.classifier,
                             self.classifier_scheduler.optimizer, e,
                             self.args.classifier_file + '.%d' % e)

            log('[Training] %d: checkpoint' % e)
            torch.cuda.empty_cache()  # necessary?

            # Save statistics and plots.
            if self.args.training_file:
                utils.write_hdf5(self.args.training_file,
                                 self.train_statistics)
                log('[Training] %d: wrote %s' % (e, self.args.training_file))
            if self.args.testing_file:
                utils.write_hdf5(self.args.testing_file, self.test_statistics)
                log('[Training] %d: wrote %s' % (e, self.args.testing_file))

            #if utils.display():
            #    self.plot()

            e += 1  # !

        testing = elapsed(functools.partial(self.test, e))
        log('[Training] %gs testing' % (testing))

        #utils.remove(self.args.encoder_file + '.%d' % (e - 1))
        #utils.remove(self.args.decoder_file + '.%d' % (e - 1))
        #utils.remove(self.args.classifier_file + '.%d' % (e - 1))
        State.checkpoint(self.encoder, self.encoder_scheduler.optimizer, e,
                         self.args.encoder_file)
        State.checkpoint(self.decoder, self.decoder_scheduler.optimizer, e,
                         self.args.decoder_file)
        State.checkpoint(self.classifier, self.classifier_scheduler.optimizer,
                         e, self.args.classifier_file)

        self.results = {
            'training_statistics': self.train_statistics,
            'testing_statistics': self.test_statistics,
        }
        if self.args.results_file:
            utils.write_pickle(self.args.results_file, self.results)
            log('[Training] wrote %s' % self.args.results_file)
    def test(self, epoch):
        """
        Test the model.

        :param epoch: current epoch
        :type epoch: int
        """

        self.encoder.eval()
        log('[Training] %d set encoder to eval' % epoch)
        self.decoder.eval()
        log('[Training] %d set decoder to eval' % epoch)
        self.classifier.eval()
        log('[Training] %d set classifier to eval' % epoch)

        latent_loss = 0
        reconstruction_loss = 0
        reconstruction_error = 0
        decoder_loss = 0
        discriminator_loss = 0
        mean = 0
        var = 0
        logvar = 0
        pred_images = None
        pred_codes = None

        num_batches = int(
            math.ceil(self.test_images.shape[0] / self.args.batch_size))
        assert self.encoder.training is False

        for b in range(num_batches):
            b_start = b * self.args.batch_size
            b_end = min((b + 1) * self.args.batch_size,
                        self.test_images.shape[0])
            batch_images = common.torch.as_variable(
                self.test_images[b_start:b_end], self.args.use_gpu)
            batch_images = batch_images.permute(0, 3, 1, 2)

            output_mu, output_logvar = self.encoder(batch_images)
            output_images = self.decoder(output_mu)

            output_real_classes = self.classifier(batch_images)
            output_reconstructed_classes = self.classifier(output_images)

            # Latent loss.
            e = self.latent_loss(output_mu, output_logvar)
            latent_loss += e.item()

            # Reconstruction loss.
            e = self.reconstruction_loss(batch_images, output_images)
            reconstruction_loss += e.item()

            # Reconstruction error.
            e = self.reconstruction_error(batch_images, output_images)
            reconstruction_error += e.item()

            e = self.decoder_loss(output_reconstructed_classes)
            decoder_loss += e.item()

            # Adversarial loss.
            e = self.discriminator_loss(output_real_classes,
                                        output_reconstructed_classes)
            discriminator_loss += e.item()

            mean += torch.mean(output_mu).item()
            var += torch.var(output_mu).item()
            logvar += torch.mean(output_logvar).item()

            output_images = numpy.squeeze(
                numpy.transpose(output_images.cpu().detach().numpy(),
                                (0, 2, 3, 1)))
            pred_images = common.numpy.concatenate(pred_images, output_images)
            output_codes = output_mu.cpu().detach().numpy()
            pred_codes = common.numpy.concatenate(pred_codes, output_codes)

        utils.write_hdf5(self.args.reconstruction_file, pred_images)
        log('[Training] %d: wrote %s' % (epoch, self.args.reconstruction_file))

        if utils.display():
            png_file = self.args.reconstruction_file + '.%d.png' % epoch
            if epoch == 0:
                vis.mosaic(png_file, self.test_images[:225], 15, 5, 'gray', 0,
                           1)
            else:
                vis.mosaic(png_file, pred_images[:225], 15, 5, 'gray', 0, 1)
            log('[Training] %d: wrote %s' % (epoch, png_file))

        latent_loss /= num_batches
        reconstruction_loss /= num_batches
        reconstruction_error /= num_batches
        decoder_loss /= num_batches
        discriminator_loss /= num_batches
        mean /= num_batches
        var /= num_batches
        logvar /= num_batches
        log('[Training] %d: test %g (%g) %g (%g, %g, %g)' %
            (epoch, reconstruction_loss, reconstruction_error, latent_loss,
             mean, var, logvar))
        log('[Training] %d: test %g %g' %
            (epoch, decoder_loss, discriminator_loss))

        num_batches = int(
            math.ceil(self.train_images.shape[0] / self.args.batch_size))
        iteration = epoch * num_batches
        self.test_statistics = numpy.vstack(
            (self.test_statistics,
             numpy.array([
                 iteration, iteration * self.args.batch_size,
                 min(num_batches, iteration),
                 min(num_batches, iteration) * self.args.batch_size,
                 reconstruction_loss, reconstruction_error, latent_loss, mean,
                 var, logvar, decoder_loss, discriminator_loss
             ])))

        pred_images = None
        if self.random_codes is None:
            self.random_codes = common.numpy.truncated_normal(
                (1000, self.args.latent_space_size)).astype(numpy.float32)
        num_batches = int(
            math.ceil(self.random_codes.shape[0] / self.args.batch_size))

        for b in range(num_batches):
            b_start = b * self.args.batch_size
            b_end = min((b + 1) * self.args.batch_size,
                        self.test_images.shape[0])
            if b_start >= b_end: break

            batch_codes = common.torch.as_variable(
                self.random_codes[b_start:b_end], self.args.use_gpu)
            output_images = self.decoder(batch_codes)

            output_images = numpy.squeeze(
                numpy.transpose(output_images.cpu().detach().numpy(),
                                (0, 2, 3, 1)))
            pred_images = common.numpy.concatenate(pred_images, output_images)

        utils.write_hdf5(self.args.random_file, pred_images)
        log('[Training] %d: wrote %s' % (epoch, self.args.random_file))

        if utils.display() and epoch > 0:
            png_file = self.args.random_file + '.%d.png' % epoch
            vis.mosaic(png_file, pred_images[:225], 15, 5, 'gray', 0, 1)
            log('[Training] %d: wrote %s' % (epoch, png_file))

        interpolations = None
        perm = numpy.random.permutation(numpy.array(range(
            pred_codes.shape[0])))

        for i in range(50):
            first = pred_codes[i]
            second = pred_codes[perm[i]]
            linfit = scipy.interpolate.interp1d([0, 1],
                                                numpy.vstack([first, second]),
                                                axis=0)
            interpolations = common.numpy.concatenate(
                interpolations, linfit(numpy.linspace(0, 1, 10)))

        pred_images = None
        num_batches = int(
            math.ceil(interpolations.shape[0] / self.args.batch_size))
        interpolations = interpolations.astype(numpy.float32)

        for b in range(num_batches):
            b_start = b * self.args.batch_size
            b_end = min((b + 1) * self.args.batch_size,
                        self.test_images.shape[0])
            if b_start >= b_end: break

            batch_codes = common.torch.as_variable(
                interpolations[b_start:b_end], self.args.use_gpu)
            output_images = self.decoder(batch_codes)
            output_images = numpy.squeeze(
                numpy.transpose(output_images.cpu().detach().numpy(),
                                (0, 2, 3, 1)))
            pred_images = common.numpy.concatenate(pred_images, output_images)

            if b % 100 == 50:
                log('[Testing] %d' % b)

        utils.write_hdf5(self.args.interpolation_file, pred_images)
        log('[Testing] wrote %s' % self.args.interpolation_file)

        if utils.display() and epoch > 0:
            png_file = self.args.interpolation_file + '.%d.png' % epoch
            vis.mosaic(png_file, pred_images[:100], 10, 5, 'gray', 0, 1)
            log('[Training] %d: wrote %s' % (epoch, png_file))
    def attack(self):
        """
        Test the model.
        """

        assert self.model is not None
        assert self.model.training is False

        if self.args.attack.find('Batch') >= 0:
            batch_size = min(self.args.batch_size, self.args.max_samples)
        else:
            batch_size = 1

        objective = self.objective_class()
        num_batches = int(math.ceil(self.args.max_samples / batch_size))

        # can't squeeze here!
        if self.test_images.shape[3] > 1:
            self.perturbations = numpy.zeros(
                (self.args.max_attempts, self.args.max_samples,
                 self.test_images.shape[1], self.test_images.shape[2],
                 self.test_images.shape[3]))
        else:
            self.perturbations = numpy.zeros(
                (self.args.max_attempts, self.args.max_samples,
                 self.test_images.shape[1], self.test_images.shape[2]))
        self.success = numpy.ones(
            (self.args.max_attempts, self.args.max_samples), dtype=int) * -1
        self.probabilities = numpy.zeros(
            (self.args.max_attempts, self.args.max_samples, self.N_class))

        for i in range(num_batches):  # self.test_images.shape[0]
            if i * batch_size == self.args.max_samples:
                break

            i_start = i * batch_size
            i_end = min((i + 1) * batch_size, self.args.max_samples)

            batch_images = numpy.random.randint(0,
                                                255,
                                                size=[batch_size] +
                                                self.test_images.shape[1:])
            batch_images = common.torch.as_variable(batch_images,
                                                    self.args.use_gpu)
            batch_images = batch_images.permute(0, 3, 1, 2)

            batch_classes = common.torch.as_variable(
                numpy.random.randint(0,
                                     self.N_class - 1,
                                     size=(batch_images.size(0))),
                self.args.use_gpu)

            t = 0
            while t < self.args.max_attempts:
                attack = self.setup_attack(batch_images, batch_classes)
                success, perturbations, probabilities, norm, _ = attack.run(
                    objective)
                assert not numpy.any(
                    perturbations != perturbations), perturbations

                # Note that we save the perturbed image, not only the perturbation!
                self.perturbations[t][i_start:i_end] = numpy.squeeze(
                    numpy.transpose(perturbations + batch_images.cpu().numpy(),
                                    (0, 2, 3, 1)))
                self.success[t][i_start:i_end] = success
                self.probabilities[t][i_start:i_end] = probabilities
                # IMPORTANT: The adversarial examples are not considering whether the classifier is
                # actually correct to start with.

                t += 1

            log('[Attack] %d: completed' % i)

        utils.write_hdf5(self.args.perturbations_file, self.perturbations)
        log('[Attack] wrote %s' % self.args.perturbations_file)
        utils.write_hdf5(self.args.success_file, self.success)
        log('[Attack] wrote %s' % self.args.success_file)
        utils.write_hdf5(self.args.probabilities_file, self.probabilities)
        log('[Attack] wrote %s' % self.args.probabilities_file)
    def attack(self):
        """
        Test the model.
        """

        assert self.model is not None
        assert self.model.classifier.training is False

        concatenate_axis = -1
        if os.path.exists(self.args.perturbations_file) and os.path.exists(self.args.success_file):
            self.original_perturbations = utils.read_hdf5(self.args.perturbations_file)
            assert len(self.original_perturbations.shape) == 3, self.original_perturbations.shape
            log('[Attack] read %s' % self.args.perturbations_file)

            self.original_success = utils.read_hdf5(self.args.success_file)
            log('[Attack] read %s' % self.args.success_file)

            assert self.original_perturbations.shape[0] == self.original_success.shape[0]
            assert self.original_perturbations.shape[1] == self.original_success.shape[1]
            assert self.original_perturbations.shape[2] == self.test_theta.shape[1]

            if self.original_perturbations.shape[1] <= self.args.max_samples and self.original_perturbations.shape[0] <= self.args.max_attempts:
                log('[Attack] found %d attempts, %d samples, requested no more' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1]))
                return
            elif self.original_perturbations.shape[0] == self.args.max_attempts or self.original_perturbations.shape[1] == self.args.max_samples:
                if self.original_perturbations.shape[0] == self.args.max_attempts:
                    self.test_theta = self.test_theta[self.original_perturbations.shape[1]:]
                    self.test_fonts = self.test_fonts[self.original_perturbations.shape[1]:]
                    self.test_classes = self.test_classes[self.original_perturbations.shape[1]:]
                    self.args.max_samples = self.args.max_samples - self.original_perturbations.shape[1]
                    concatenate_axis = 1
                    log('[Attack] found %d attempts with %d perturbations, computing %d more perturbations' % (
                    self.original_perturbations.shape[0], self.original_perturbations.shape[1], self.args.max_samples))
                elif self.original_perturbations.shape[1] == self.args.max_samples:
                    self.args.max_attempts = self.args.max_attempts - self.original_perturbations.shape[0]
                    concatenate_axis = 0
                    log('[Attack] found %d attempts with %d perturbations, computing %d more attempts' % (
                    self.original_perturbations.shape[0], self.original_perturbations.shape[1], self.args.max_attempts))

        self.perturbations = numpy.zeros((self.args.max_attempts, self.args.max_samples, self.test_theta.shape[1]))
        self.success = numpy.ones((self.args.max_attempts, self.args.max_samples), dtype=int) * -1

        if self.args.attack.find('Batch') >= 0:
            batch_size = min(self.args.batch_size, self.args.max_samples)
        else:
            batch_size = 1

        objective = self.objective_class()
        num_batches = int(math.ceil(self.args.max_samples/batch_size))

        for i in range(num_batches):
            if i*batch_size == self.args.max_samples:
                break

            i_start = i * batch_size
            i_end = min((i + 1) * batch_size, self.args.max_samples)

            batch_fonts = self.test_fonts[i_start: i_end]
            batch_classes = self.test_classes[i_start: i_end]
            batch_code = numpy.concatenate((common.numpy.one_hot(batch_fonts, self.N_font), common.numpy.one_hot(batch_classes, self.N_class)), axis=1).astype(numpy.float32)

            batch_classes = common.torch.as_variable(batch_classes, self.args.use_gpu)
            batch_inputs = common.torch.as_variable(self.test_theta[i_start: i_end], self.args.use_gpu)
            batch_code = common.torch.as_variable(batch_code, self.args.use_gpu)

            t = 0
            # This basically allows to only optimize over theta, keeping the font/class code fixed.
            self.model.decoder.set_code(batch_code)

            while True and t < self.args.max_attempts:
                attack = self.setup_attack(batch_inputs, batch_classes)
                success, perturbations, probabilities, norm, _ = attack.run(objective)
                assert not numpy.any(perturbations != perturbations), perturbations

                # Note that we save the perturbed image, not only the perturbation!
                perturbations = perturbations.reshape(batch_inputs.size())  # hack for when only one dimensional latent space is used!
                self.perturbations[t][i_start: i_end] = perturbations + batch_inputs.cpu().numpy()
                self.success[t][i_start: i_end] = success
                t += 1

            log('[Attack] %d: completed' % i)

        if concatenate_axis >= 0:
            if self.perturbations.shape[0] == self.args.max_attempts:
                self.perturbations = numpy.concatenate((self.original_perturbations, self.perturbations), axis=concatenate_axis)
                self.success = numpy.concatenate((self.original_success, self.success), axis=concatenate_axis)
                log('[Attack] concatenated')

        utils.write_hdf5(self.args.perturbations_file, self.perturbations)
        log('[Attack] wrote %s' % self.args.perturbations_file)
        utils.write_hdf5(self.args.success_file, self.success)
        log('[Attack] wrote %s' % self.args.success_file)
Пример #27
0
    def main(self):
        """
        Main method.
        """

        database = utils.read_hdf5(self.args.database_file)
        log('[Data] read %s' % self.args.database_file)

        # one-hot size of code
        N_fonts = database.shape[0]
        N_classes = database.shape[1]
        N = N_fonts * N_classes

        #
        # Fonts and codes are created in the following way (example for 10 classes):
        #
        # font class
        # 0    0
        # 0    1
        # ...
        # 0    9
        # 1    0
        # 1    1
        # ...
        # 1    9
        #
        # This scheme is then repeated according to the multiplier.
        # The advantage of this scheme is that a balanced subset can be selected
        # in multiples of 10.
        #

        codes_fonts = numpy.expand_dims(numpy.repeat(numpy.array(
            range(N_fonts)),
                                                     N_classes,
                                                     axis=0),
                                        axis=1)
        codes_classes = numpy.expand_dims(numpy.tile(
            numpy.array(range(N_classes)), (N_fonts)),
                                          axis=1)
        codes = numpy.concatenate((numpy.expand_dims(
            numpy.arange(N), axis=1), codes_fonts, codes_classes),
                                  axis=1)
        codes = numpy.tile(codes, (self.args.multiplier, 1))

        N_theta = self.args.number_transformations
        theta = numpy.zeros((self.args.multiplier * N, N_theta))

        assert N_theta > 0
        if N_theta > 0:  # translation x
            theta[:, 0] = numpy.random.uniform(self.args.min_translation,
                                               self.args.max_translation,
                                               size=(self.args.multiplier * N))
        if N_theta > 1:  # translation y
            theta[:, 1] = numpy.random.uniform(self.args.min_translation,
                                               self.args.max_translation,
                                               size=(self.args.multiplier * N))
        if N_theta > 2:  # shear x
            theta[:, 2] = numpy.random.uniform(self.args.min_shear,
                                               self.args.max_shear,
                                               size=(self.args.multiplier * N))
        if N_theta > 3:  # shear y
            theta[:, 3] = numpy.random.uniform(self.args.min_shear,
                                               self.args.max_shear,
                                               size=(self.args.multiplier * N))
        if N_theta > 4:  # scale
            theta[:, 4] = numpy.random.uniform(self.args.min_scale,
                                               self.args.max_scale,
                                               size=(self.args.multiplier * N))
        if N_theta > 5:  # rotation
            theta[:, 5] = numpy.random.uniform(self.args.min_rotation,
                                               self.args.max_rotation,
                                               size=(self.args.multiplier * N))
        if N_theta > 6:
            theta[:, 6] = numpy.random.uniform(self.args.min_color,
                                               1,
                                               size=(self.args.multiplier * N))
        if N_theta > 7:
            theta[:, 7] = numpy.random.uniform(self.args.min_color,
                                               1,
                                               size=(self.args.multiplier * N))
        if N_theta > 8:
            theta[:, 8] = numpy.random.uniform(self.args.min_color,
                                               1,
                                               size=(self.args.multiplier * N))

        utils.write_hdf5(self.args.codes_file, codes)
        log('[Data] wrote %s' % self.args.codes_file)
        utils.write_hdf5(self.args.theta_file, theta)
        log('[Data] wrote %s' % self.args.theta_file)
Пример #28
0
    def attack(self):
        """
        Test the model.
        """

        assert self.model is not None
        assert self.model.training is False
        assert self.test_images.shape[0] == self.test_codes.shape[0], 'number of samples has to match'

        concatenate_axis = -1
        if os.path.exists(self.args.perturbations_file) and os.path.exists(self.args.success_file):
            self.original_perturbations = utils.read_hdf5(self.args.perturbations_file)
            if self.test_images.shape[3] > 1:
                assert len(self.original_perturbations.shape) == 5
            else:
                assert len(self.original_perturbations.shape) == 4
            log('[Attack] read %s' % self.args.perturbations_file)

            self.original_success = utils.read_hdf5(self.args.success_file)
            log('[Attack] read %s' % self.args.success_file)

            assert self.original_perturbations.shape[0] == self.original_success.shape[0]
            assert self.original_perturbations.shape[1] == self.original_success.shape[1]
            assert self.original_perturbations.shape[2] == self.test_images.shape[1]
            assert self.original_perturbations.shape[3] == self.test_images.shape[2]#

            if self.original_perturbations.shape[1] >= self.args.max_samples and self.original_perturbations.shape[0] >= self.args.max_attempts:
                log('[Attack] found %d attempts, %d samples, requested no more' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1]))
                return
            elif self.original_perturbations.shape[0] == self.args.max_attempts or self.original_perturbations.shape[1] == self.args.max_samples:
                if self.original_perturbations.shape[0] == self.args.max_attempts:
                    self.test_images = self.test_images[self.original_perturbations.shape[1]:]
                    self.test_codes = self.test_codes[self.original_perturbations.shape[1]:]
                    self.args.max_samples = self.args.max_samples - self.original_perturbations.shape[1]
                    concatenate_axis = 1
                    log('[Attack] found %d attempts with %d perturbations, computing %d more perturbations' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1], self.args.max_samples))
                elif self.original_perturbations.shape[1] == self.args.max_samples:
                    self.args.max_attempts = self.args.max_attempts - self.original_perturbations.shape[0]
                    concatenate_axis = 0
                    log('[Attack] found %d attempts with %d perturbations, computing %d more attempts' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1], self.args.max_attempts))

        # can't squeeze here!
        if self.test_images.shape[3] > 1:
            self.perturbations = numpy.zeros((self.args.max_attempts, self.args.max_samples, self.test_images.shape[1], self.test_images.shape[2], self.test_images.shape[3]))
        else:
            self.perturbations = numpy.zeros((self.args.max_attempts, self.args.max_samples, self.test_images.shape[1], self.test_images.shape[2]))
        self.success = numpy.ones((self.args.max_attempts, self.args.max_samples), dtype=int) * -1

        if self.args.attack.find('Batch') >= 0:
            batch_size = min(self.args.batch_size, self.args.max_samples)
        else:
            batch_size = 1

        objective = self.objective_class()
        num_batches = int(math.ceil(self.args.max_samples/batch_size))

        for i in range(num_batches):  # self.test_images.shape[0]
            if i*batch_size == self.args.max_samples:
                break
                
            i_start = i*batch_size
            i_end = min((i+1)*batch_size, self.args.max_samples)

            batch_images = common.torch.as_variable(self.test_images[i_start: i_end], self.args.use_gpu)
            batch_classes = common.torch.as_variable(numpy.array(self.test_codes[i_start: i_end]), self.args.use_gpu)
            batch_images = batch_images.permute(0, 3, 1, 2)

            t = 0
            while t < self.args.max_attempts:
                attack = self.setup_attack(batch_images, batch_classes)
                success, perturbations, probabilities, norm, _ = attack.run(objective)
                assert not numpy.any(perturbations != perturbations), perturbations

                # Note that we save the perturbed image, not only the perturbation!
                self.perturbations[t][i_start: i_end] = numpy.squeeze(numpy.transpose(perturbations + batch_images.cpu().numpy(), (0, 2, 3, 1)))
                self.success[t][i_start: i_end] = success

                # IMPORTANT: The adversarial examples are not considering whether the classifier is
                # actually correct to start with.

                t += 1

            log('[Attack] %d: completed' % i)

        if concatenate_axis >= 0:
            if self.perturbations.shape[0] == self.args.max_attempts:
                self.perturbations = numpy.concatenate((self.original_perturbations, self.perturbations), axis=concatenate_axis)
                self.success = numpy.concatenate((self.original_success, self.success), axis=concatenate_axis)
                log('[Attack] concatenated')

        utils.write_hdf5(self.args.perturbations_file, self.perturbations)
        log('[Attack] wrote %s' % self.args.perturbations_file)
        utils.write_hdf5(self.args.success_file, self.success)
        log('[Attack] wrote %s' % self.args.success_file)