示例#1
0
    def test_test(self):
        """
        Test on testing set.
        """

        num_batches = int(
            math.ceil(self.test_images.shape[0] / self.args.batch_size))

        for b in range(num_batches):
            b_start = b * self.args.batch_size
            b_end = min((b + 1) * self.args.batch_size,
                        self.test_images.shape[0])

            batch_images = common.torch.as_variable(
                self.test_images[b_start:b_end], self.args.use_gpu)
            batch_images = batch_images.permute(0, 3, 1, 2)

            # Important to get the correct codes!
            output_codes, output_logvar = self.encoder(batch_images)
            output_images = self.decoder(output_codes)
            e = self.reconstruction_loss(batch_images, output_images)
            self.reconstruction_error += e.data

            self.code_mean += torch.mean(output_codes).item()
            self.code_var += torch.var(output_codes).item()

            output_images = numpy.squeeze(
                numpy.transpose(output_images.cpu().detach().numpy(),
                                (0, 2, 3, 1)))
            self.pred_images = common.numpy.concatenate(
                self.pred_images, output_images)

            output_codes = output_codes.cpu().detach().numpy()
            self.pred_codes = common.numpy.concatenate(self.pred_codes,
                                                       output_codes)

            if b % 100 == 50:
                log('[Testing] %d' % b)

        assert self.pred_images.shape[0] == self.test_images.shape[
            0], 'computed invalid number of test images'
        if self.args.reconstruction_file:
            utils.write_hdf5(self.args.reconstruction_file, self.pred_images)
            log('[Testing] wrote %s' % self.args.reconstruction_file)

        if self.args.test_theta_file:
            assert self.pred_codes.shape[0] == self.test_images.shape[
                0], 'computed invalid number of test codes'
            utils.write_hdf5(self.args.test_theta_file, self.pred_codes)
            log('[Testing] wrote %s' % self.args.test_theta_file)

        threshold = 0.9
        percentage = 0
        # values = numpy.linalg.norm(pred_codes, ord=2, axis=1)
        values = numpy.max(numpy.abs(self.pred_codes), axis=1)

        while percentage < 0.9:
            threshold += 0.1
            percentage = numpy.sum(values <= threshold) / float(
                values.shape[0])
            log('[Testing] threshold %g percentage %g' %
                (threshold, percentage))
        log('[Testing] taking threshold %g with percentage %g' %
            (threshold, percentage))

        if self.args.output_directory and utils.display():
            # fit = 10
            # plot_file = os.path.join(self.args.output_directory, 'test_codes')
            # plot.manifold(plot_file, pred_codes[::fit], None, None, 'tsne', None, title='t-SNE of Test Codes')
            # log('[Testing] wrote %s' % plot_file)

            for d in range(1, self.pred_codes.shape[1]):
                plot_file = os.path.join(self.args.output_directory,
                                         'test_codes_%s' % d)
                plot.scatter(
                    plot_file,
                    self.pred_codes[:, 0],
                    self.pred_codes[:, d], (values <= threshold).astype(int),
                    ['greater %g' % threshold,
                     'smaller %g' % threshold],
                    title='Dimensions 0 and %d of Test Codes' % d)
                log('[Testing] wrote %s' % plot_file)

        self.reconstruction_error /= num_batches
        log('[Testing] reconstruction error %g' % self.reconstruction_error)
    def visualize_perturbations(self):
        """
        Visualize perturbations.
        """

        num_attempts = self.perturbations.shape[1]
        num_attempts = min(num_attempts, 6)
        utils.makedir(self.args.output_directory)

        count = 0
        for i in range(min(1000, self.perturbations.shape[0])):

            log('[Visualization] sample %d, iterations %s and correctly classified: %s'
                % (i + 1, ' '.join(list(map(
                    str, self.success[i]))), self.accuracy[i]))
            if not numpy.any(self.success[i] >= 0) or not self.accuracy[i]:
                continue
            elif count > 200:
                break

            #fig, axes = pyplot.subplots(num_attempts, 8)
            #if num_attempts == 1:
            #    axes = [axes] # dirty hack for axis indexing

            for j in range(num_attempts):
                theta = self.test_theta[i]
                theta_attack = self.perturbations[i][j]
                theta_perturbation = theta_attack - theta

                image = self.test_images[i]
                image_attack = self.perturbation_images[i][j]
                image_perturbation = image_attack - image

                max_theta_perturbation = numpy.max(
                    numpy.abs(theta_perturbation))
                theta_perturbation /= max_theta_perturbation

                max_image_perturbation = numpy.max(
                    numpy.abs(image_perturbation))
                image_perturbation /= max_image_perturbation

                image_representation = self.theta_representations[i]
                attack_representation = self.perturbation_representations[i][j]

                image_label = numpy.argmax(image_representation)
                attack_label = numpy.argmax(attack_representation)

                #vmin = min(numpy.min(theta), numpy.min(theta_attack))
                #vmax = max(numpy.max(theta), numpy.max(theta_attack))
                #axes[j][0].imshow(theta.reshape(1, -1), interpolation='nearest', vmin=vmin, vmax=vmax)
                #axes[j][1].imshow(numpy.squeeze(image), interpolation='nearest', cmap='gray', vmin=0, vmax=1)
                #axes[j][2].imshow(theta_perturbation.reshape(1, -1), interpolation='nearest', vmin=vmin, vmax=vmax)
                #axes[j][2].text(0, -1, 'x' + str(max_theta_perturbation))
                #axes[j][3].imshow(numpy.squeeze(image_perturbation), interpolation='nearest', cmap='seismic', vmin=-1, vmax=1)
                #axes[j][3].text(0, -image.shape[1]//8, 'x' + str(max_image_perturbation))
                #axes[j][4].imshow(theta_attack.reshape(1, -1), interpolation='nearest', vmin=vmin, vmax=vmax)
                #axes[j][5].imshow(numpy.squeeze(image_attack), interpolation='nearest', cmap='gray', vmin=0, vmax=1)

                #axes[j][6].imshow(image_representation.reshape(1, -1), interpolation='nearest', vmin=vmin, vmax=vmax)
                #axes[j][6].text(0, -1, 'Label:' + str(image_label))
                #axes[j][7].imshow(attack_representation.reshape(1, -1), interpolation='nearest', vmin=vmin, vmax=vmax)
                #axes[j][7].text(0, -1, 'Label:' + str(attack_label))

                image_file = os.path.join(
                    self.args.output_directory,
                    '%d_%d_image_%d.png' % (i, j, image_label))
                attack_file = os.path.join(
                    self.args.output_directory,
                    '%d_%d_attack_%d.png' % (i, j, attack_label))
                perturbation_file = os.path.join(
                    self.args.output_directory, '%d_%d_perturbation_%g.png' %
                    (i, j, max_image_perturbation))

                vis.image(image_file, image, scale=10)
                vis.image(attack_file, image_attack, scale=10)
                vis.perturbation(perturbation_file,
                                 image_perturbation,
                                 scale=10)

            #plot_file = os.path.join(self.args.output_directory, str(i) + '.png')
            #pyplot.savefig(plot_file)
            #pyplot.close(fig)
            count += 1
    def visualize_perturbations(self):
        """
        Visualize perturbations.
        """

        num_attempts = self.perturbations.shape[1]
        num_attempts = min(num_attempts, 6)
        utils.makedir(self.args.output_directory)

        count = 0
        for i in range(min(1000, self.perturbations.shape[0])):

            if not numpy.any(self.success[i]) or not self.accuracy[i]:
                continue
            elif count > 200:
                break

            #fig, axes = pyplot.subplots(num_attempts, 5)
            #if num_attempts == 1:
            #    axes = [axes] # dirty hack for axis indexing

            for j in range(num_attempts):
                image = self.test_images[i]
                attack = self.perturbations[i][j]
                perturbation = attack - image
                max_perturbation = numpy.max(numpy.abs(perturbation))
                perturbation /= max_perturbation

                image_representation = self.image_representations[i]
                attack_representation = self.perturbation_representations[i][j]

                image_label = numpy.argmax(image_representation)
                attack_label = numpy.argmax(attack_representation)

                #axes[j][0].imshow(numpy.squeeze(image), interpolation='nearest', cmap='gray', vmin=0, vmax=1)
                #axes[j][1].imshow(numpy.squeeze(perturbation), interpolation='nearest', cmap='seismic', vmin=-1, vmax=1)
                #axes[j][1].text(0, -image.shape[1]//8, 'x' + str(max_perturbation))
                #axes[j][2].imshow(numpy.squeeze(attack), interpolation='nearest', cmap='gray', vmin=0, vmax=1)

                #vmin = min(numpy.min(image_representation), numpy.min(attack_representation))
                #vmax = max(numpy.max(image_representation), numpy.max(attack_representation))
                #axes[j][3].imshow(image_representation.reshape(1, -1), interpolation='nearest', vmin=vmin, vmax=vmax)
                #axes[j][3].text(0, -1, 'Label:' + str(image_label))
                #axes[j][4].imshow(attack_representation.reshape(1, -1), interpolation='nearest', vmin=vmin, vmax=vmax)
                #axes[j][4].text(0, -1, 'Label:' + str(attack_label))

                image_file = os.path.join(
                    self.args.output_directory,
                    '%d_%d_image_%d.png' % (i, j, image_label))
                attack_file = os.path.join(
                    self.args.output_directory,
                    '%d_%d_attack_%d.png' % (i, j, attack_label))
                perturbation_file = os.path.join(
                    self.args.output_directory,
                    '%d_%d_perturbation_%g.png' % (i, j, max_perturbation))

                vis.image(image_file, image, scale=10)
                vis.image(attack_file, attack, scale=10)
                vis.perturbation(perturbation_file, perturbation, scale=10)

                if len(perturbation.shape) > 2:
                    perturbation_magnitude = numpy.linalg.norm(perturbation,
                                                               ord=2,
                                                               axis=2)
                    max_perturbation_magnitude = numpy.max(
                        numpy.abs(perturbation_magnitude))
                    perturbation_magnitude /= max_perturbation_magnitude

                    perturbation_file = os.path.join(
                        self.args.output_directory,
                        '%d_%d_perturbation_magnitude_%g.png' %
                        (i, j, max_perturbation_magnitude))
                    vis.perturbation(perturbation_file,
                                     perturbation_magnitude,
                                     scale=10)

            #plot_file = os.path.join(self.args.output_directory, str(i) + '.png')
            #pyplot.savefig(plot_file)
            #pyplot.close(fig)
            count += 1
示例#4
0
    def load_data(self):
        """
        Load data.
        """

        assert self.args.batch_size % 4 == 0

        self.train_images = utils.read_hdf5(
            self.args.train_images_file).astype(numpy.float32)
        log('[Training] read %s' % self.args.train_images_file)

        self.test_images = utils.read_hdf5(self.args.test_images_file).astype(
            numpy.float32)
        log('[Training] read %s' % self.args.test_images_file)

        # For handling both color and gray images.
        if len(self.train_images.shape) < 4:
            self.train_images = numpy.expand_dims(self.train_images, axis=3)
            self.test_images = numpy.expand_dims(self.test_images, axis=3)
            log('[Training] no color images, adjusted size')
        self.resolution = self.test_images.shape[2]
        log('[Training] resolution %d' % self.resolution)

        self.train_codes = utils.read_hdf5(self.args.train_codes_file).astype(
            numpy.int)
        assert self.train_codes.shape[1] >= self.args.label_index + 1
        self.train_codes = self.train_codes[:, self.args.label_index]
        log('[Training] read %s' % self.args.train_codes_file)
        self.N_class = numpy.max(self.train_codes) + 1

        self.test_codes = utils.read_hdf5(self.args.test_codes_file).astype(
            numpy.int)
        assert self.test_codes.shape[1] >= self.args.label_index + 1
        self.test_codes = self.test_codes[:, self.args.label_index]
        log('[Training] read %s' % self.args.test_codes_file)

        self.train_theta = utils.read_hdf5(self.args.train_theta_file).astype(
            numpy.float32)
        log('[Training] read %s' % self.args.train_theta_file)

        assert self.test_images.shape[0] == self.test_codes.shape[0]

        self.min_bound = numpy.min(self.train_theta, axis=0)
        self.max_bound = numpy.max(self.train_theta, axis=0)
        log('[Training] min bound: %s' % ' '.join(
            ['%g' % self.min_bound[i]
             for i in range(self.min_bound.shape[0])]))
        log('[Training] max bound: %s' % ' '.join(
            ['%g' % self.max_bound[i]
             for i in range(self.max_bound.shape[0])]))

        self.test_theta = utils.read_hdf5(self.args.test_theta_file).astype(
            numpy.float32)
        log('[Training] read %s' % self.args.test_theta_file)

        assert self.train_codes.shape[0] == self.train_images.shape[0]
        assert self.test_codes.shape[0] == self.test_images.shape[0]
        assert self.train_theta.shape[
            0] == self.train_images.shape[0], '%s != %s' % ('x'.join(
                list(map(str, self.train_theta.shape))), 'x'.join(
                    list(map(str, self.train_images.shape))))
        assert self.test_theta.shape[0] == self.test_images.shape[0]

        # Select subset of samples
        if self.args.training_samples < 0:
            self.args.training_samples = self.train_images.shape[0]
        else:
            self.args.training_samples = min(self.args.training_samples,
                                             self.train_images.shape[0])
        log('[Training] using %d training samples' %
            self.args.training_samples)

        if self.args.test_samples < 0:
            self.args.test_samples = self.test_images.shape[0]
        else:
            self.args.test_samples = min(self.args.test_samples,
                                         self.test_images.shape[0])

        if self.args.early_stopping:
            assert self.args.validation_samples > 0
            assert self.args.training_samples + self.args.validation_samples <= self.train_images.shape[
                0]
            self.val_images = self.train_images[self.train_images.shape[0] -
                                                self.args.validation_samples:]
            self.val_codes = self.train_codes[self.train_codes.shape[0] -
                                              self.args.validation_samples:]
            self.train_images = self.train_images[:self.train_images.shape[0] -
                                                  self.args.validation_samples]
            self.train_codes = self.train_codeſ[:self.train_codes.shape[0] -
                                                self.args.validation_samples]
            assert self.val_images.shape[
                0] == self.args.validation_samples and self.val_codes.shape[
                    0] == self.args.validation_samples

        if self.args.random_samples:
            perm = numpy.random.permutation(self.train_images.shape[0] // 10)
            perm = perm[:self.args.training_samples // 10]
            perm = numpy.repeat(perm, self.N_class, axis=0) * 10 + numpy.tile(
                numpy.array(range(self.N_class)), (perm.shape[0]))
            self.train_images = self.train_images[perm]
            self.train_codes = self.train_codes[perm]
            self.train_theta = self.train_theta[perm]
        else:
            self.train_images = self.train_images[:self.args.training_samples]
            self.train_codes = self.train_codes[:self.args.training_samples]
            self.train_theta = self.train_theta[:self.args.training_samples]

        self.train_valid = (numpy.max(numpy.abs(self.train_theta), axis=1) <=
                            self.args.bound).astype(int)
        self.test_valid = (numpy.max(numpy.abs(self.test_theta), axis=1) <=
                           self.args.bound).astype(int)

        # Check that the dataset is balanced.
        number_samples = self.train_codes.shape[0] // self.N_class
        for c in range(self.N_class):
            number_samples_ = numpy.sum(self.train_codes == c)
            if number_samples_ != number_samples:
                log(
                    '[Training] dataset not balanced, class %d should have %d samples but has %d'
                    % (c, number_samples, number_samples_), LogLevel.WARNING)
示例#5
0
    def test(self):
        """
        Test the model.
        """

        assert self.model is not None
        assert self.model.training is False
        assert self.test_images.shape[0] == self.test_codes.shape[
            0], 'number of samples have to match'

        self.loss = 0.
        self.error = 0.
        num_batches = int(
            math.ceil(self.test_images.shape[0] / self.args.batch_size))

        for b in range(num_batches):
            b_start = b * self.args.batch_size
            b_end = min((b + 1) * self.args.batch_size,
                        self.test_images.shape[0])
            batch_images = common.torch.as_variable(
                self.test_images[b_start:b_end], self.args.use_gpu)
            batch_classes = common.torch.as_variable(
                self.test_codes[b_start:b_end], self.args.use_gpu)
            batch_images = batch_images.permute(0, 3, 1, 2)

            output_classes = self.model(batch_images)
            e = torch.nn.functional.cross_entropy(output_classes,
                                                  batch_classes,
                                                  size_average=True)
            self.loss += e.item()

            values, indices = torch.max(torch.nn.functional.softmax(
                output_classes, dim=1),
                                        dim=1)
            errors = torch.abs(indices - batch_classes)
            e = torch.sum(errors > 0).float() / batch_classes.size()[0]
            self.error += e.item()

            self.accuracy = common.numpy.concatenate(self.accuracy,
                                                     errors.data.cpu().numpy())

        self.loss /= num_batches
        self.error /= num_batches
        log('[Testing] test loss %g; test error %g' % (self.loss, self.error))

        self.accuracy = self.accuracy == 0
        if self.args.accuracy_file:
            utils.write_hdf5(self.args.accuracy_file, self.accuracy)
            log('[Testing] wrote %s' % self.args.accuracy_file)

        accuracy = numpy.sum(self.accuracy) / self.accuracy.shape[0]
        if numpy.abs(1 - accuracy - self.error) < 1e-4:
            log('[Testing] accuracy file is with %g accuracy correct' %
                accuracy)

        self.results = {
            'loss': self.loss,
            'error': self.error,
        }
        if self.args.results_file:
            utils.write_pickle(self.args.results_file, self.results)
            log('[Testing] wrote %s' % self.args.results_file)