Exemplo n.º 1
0
def calculate_lp_distance(original_image, compared_image):
    """
    Calculate l0, l2 and linf distance for two images with the same shape.

    Args:
        original_image (np.ndarray): Original image.
        compared_image (np.ndarray): Another image for comparison.

    Returns:
        - float, l0 distances between two images.

        - float, l2 distances between two images.

        - float, linf distances between two images.

    Raises:
        TypeError: If type of original_image or type of compared_image is not numpy.ndarray.
        ValueError: If the shape of original_image and compared_image are not the same.
    """
    check_numpy_param('original_image', original_image)
    check_numpy_param('compared_image', compared_image)
    check_equal_shape('original_image', original_image, 'compared_image',
                      compared_image)
    avoid_zero_div = 1e-14
    diff = (original_image - compared_image).flatten()
    data = original_image.flatten()
    l0_dist = np.linalg.norm(diff, ord=0) \
               / (np.linalg.norm(data, ord=0) + avoid_zero_div)
    l2_dist = np.linalg.norm(diff, ord=2) \
               / (np.linalg.norm(data, ord=2) + avoid_zero_div)
    linf_dist = np.linalg.norm(diff, ord=np.inf) \
                 / (np.linalg.norm(data, ord=np.inf) + avoid_zero_div)
    return l0_dist, l2_dist, linf_dist
Exemplo n.º 2
0
    def _dist(self, before, after):
        """
        Calculate the distance between the model outputs of a raw sample and
            its smoothed counterpart.

        Args:
            before (numpy.ndarray): Model output of raw samples.
            after (numpy.ndarray): Model output of smoothed counterparts.

        Returns:
            float, distance based on specified norm.
        """
        before, after = check_pair_numpy_param('before', before, 'after',
                                               after)
        before, after = check_equal_shape('before', before, 'after', after)
        res = []
        diff = after - before
        for _, elem in enumerate(diff):
            if self._metric == 'l1':
                res.append(np.linalg.norm(elem, ord=1))
            elif self._metric == 'l2':
                res.append(np.linalg.norm(elem, ord=2))
            else:
                res.append(np.linalg.norm(elem, ord=1))
        return res
    def fit(self, inputs, labels=None):
        """
        Train detector to decide the best radius.

        Args:
            inputs (numpy.ndarray): Benign samples.
            labels (numpy.ndarray): Ground truth labels of the input samples.
                Default:None.

        Returns:
            float, the best radius.
        """
        inputs, labels = check_pair_numpy_param('inputs', inputs, 'labels',
                                                labels)
        LOGGER.debug(TAG, 'enter fit() function.')
        time_start = time.time()
        search_iters = (self._max_radius -
                        self._initial_radius) / self._search_step
        search_iters = np.round(search_iters).astype(int)
        radius = self._initial_radius
        pred = self._model.predict(Tensor(inputs))
        raw_preds = np.argmax(pred.asnumpy(), axis=1)
        if not self._sparse:
            labels = np.argmax(labels, axis=1)
        raw_preds, labels = check_equal_shape('raw_preds', raw_preds, 'labels',
                                              labels)
        raw_acc = np.sum(raw_preds == labels) / inputs.shape[0]

        for _ in range(search_iters):
            rc_preds = self._rc_forward(inputs, radius)
            rc_preds, labels = check_equal_shape('rc_preds', rc_preds,
                                                 'labels', labels)
            def_acc = np.sum(rc_preds == labels) / inputs.shape[0]
            if def_acc >= raw_acc - self._degrade_limit:
                radius += self._search_step
                continue
            break

        self._radius = radius - self._search_step
        LOGGER.debug(TAG, 'best radius is: %s', self._radius)
        LOGGER.debug(
            TAG, 'time used to train detector of %d samples is: %s seconds',
            inputs.shape[0],
            time.time() - time_start)
        return self._radius
Exemplo n.º 4
0
    def _fast_reduction(self, x_ori, best_position, q_times, auxiliary_inputs,
                        gt_boxes, gt_labels, model):
        """
        Decrease the differences between the original samples and adversarial samples in a fast way.

        Args:
            x_ori (numpy.ndarray): Original samples.
            best_position (numpy.ndarray): Adversarial examples.
            q_times (int): Query times.
            auxiliary_inputs (tuple): Auxiliary inputs mathced with x_ori.
            gt_boxes (numpy.ndarray): Ground-truth boxes of x_ori.
            gt_labels (numpy.ndarray): Ground-truth labels of x_ori.
            model (BlackModel): Target model.

        Returns:
            - numpy.ndarray, adversarial examples after reduction.

            - int, total query times after reduction.
        """
        LOGGER.info(TAG, 'Reduction begins...')
        model = check_model('model', model, BlackModel)
        x_ori = check_numpy_param('x_ori', x_ori)
        _, gt_num = self._detection_scores((x_ori, ) + auxiliary_inputs,
                                           gt_boxes, gt_labels, model)
        best_position = check_numpy_param('best_position', best_position)
        x_ori, best_position = check_equal_shape('x_ori', x_ori,
                                                 'best_position',
                                                 best_position)
        _, original_num = self._detection_scores(
            (best_position, ) + auxiliary_inputs, gt_boxes, gt_labels, model)
        # pylint: disable=invalid-name
        REDUCTION_ITERS = 6  # recover 10% difference each time and recover 60% totally.
        for _ in range(REDUCTION_ITERS):
            BLOCK_NUM = 30  # divide the image into 30 segments
            block_width = best_position.shape[0] // BLOCK_NUM
            if block_width > 0:
                for i in range(BLOCK_NUM):
                    diff = x_ori[i*block_width: (i+1)*block_width, :, :]\
                           - best_position[i*block_width:(i+1)*block_width, :, :]
                    if np.max(np.abs(diff)) >= 0.1 * (self._bounds[1] -
                                                      self._bounds[0]):
                        res = diff * 0.1
                        best_position[i * block_width:(i + 1) *
                                      block_width, :, :] += res
                        _, correct_num = self._detection_scores(
                            (best_position, ) + auxiliary_inputs, gt_boxes,
                            gt_labels, model)
                        q_times += 1
                        if correct_num[0] > max(
                                original_num[0],
                                gt_num[0] * self._reserve_ratio):
                            best_position[i * block_width:(i + 1) *
                                          block_width, :, :] -= res
        return best_position, q_times
Exemplo n.º 5
0
    def _reduction(x_ori, q_times, label, best_position, model,
                   targeted_attack):
        """
        Decrease the differences between the original samples and adversarial samples.

        Args:
            x_ori (numpy.ndarray): Original samples.
            q_times (int): Query times.
            label (int): Target label ot ground-truth label.
            best_position (numpy.ndarray): Adversarial examples.
            model (BlackModel): Target model.
            targeted_attack (bool): If True, it means this is a targeted attack. If False,
                it means this is an untargeted attack.

        Returns:
            numpy.ndarray, adversarial examples after reduction.

        Examples:
            >>> adv_reduction = self._reduction(self, [0.1, 0.2, 0.3], 20, 1,
            >>> [0.12, 0.15, 0.25])
        """
        LOGGER.info(TAG, 'Reduction begins...')
        model = check_model('model', model, BlackModel)
        x_ori = check_numpy_param('x_ori', x_ori)
        best_position = check_numpy_param('best_position', best_position)
        x_ori, best_position = check_equal_shape('x_ori', x_ori,
                                                 'best_position',
                                                 best_position)
        x_ori_fla = x_ori.flatten()
        best_position_fla = best_position.flatten()
        pixel_deep = np.max(x_ori) - np.min(x_ori)
        nums_pixel = len(x_ori_fla)
        for i in range(nums_pixel):
            diff = x_ori_fla[i] - best_position_fla[i]
            if abs(diff) > pixel_deep * 0.1:
                best_position_fla[i] += diff * 0.5
                cur_label = np.argmax(
                    model.predict(best_position_fla.reshape(x_ori.shape)))
                q_times += 1
                if targeted_attack:
                    if cur_label != label:
                        best_position_fla[i] -= diff * 0.5

                else:
                    if cur_label == label:
                        best_position_fla -= diff * 0.5
        return best_position_fla.reshape(x_ori.shape), q_times
Exemplo n.º 6
0
    def _reduction(self, x_ori, q_times, label, best_position):
        """
        Decrease the differences between the original samples and adversarial samples.

        Args:
            x_ori (numpy.ndarray): Original samples.
            q_times (int): Query times.
            label (int): Target label ot ground-truth label.
            best_position (numpy.ndarray): Adversarial examples.

        Returns:
            numpy.ndarray, adversarial examples after reduction.

        Examples:
            >>> adv_reduction = self._reduction(self, [0.1, 0.2, 0.3], 20, 1,
            >>> [0.12, 0.15, 0.25])
        """
        x_ori = check_numpy_param('x_ori', x_ori)
        best_position = check_numpy_param('best_position', best_position)
        x_ori, best_position = check_equal_shape('x_ori', x_ori,
                                                 'best_position',
                                                 best_position)
        x_ori_fla = x_ori.flatten()
        best_position_fla = best_position.flatten()
        pixel_deep = self._bounds[1] - self._bounds[0]
        nums_pixel = len(x_ori_fla)
        for i in range(nums_pixel):
            diff = x_ori_fla[i] - best_position_fla[i]
            if abs(diff) > pixel_deep * 0.1:
                old_poi_fla = np.copy(best_position_fla)
                best_position_fla[i] = np.clip(
                    best_position_fla[i] + diff * 0.5, self._bounds[0],
                    self._bounds[1])
                cur_label = np.argmax(
                    self._model.predict(
                        np.expand_dims(best_position_fla.reshape(x_ori.shape),
                                       axis=0))[0])
                q_times += 1
                if self._targeted:
                    if cur_label != label:
                        best_position_fla = old_poi_fla
                else:
                    if cur_label == label:
                        best_position_fla = old_poi_fla
        return best_position_fla.reshape(x_ori.shape), q_times
Exemplo n.º 7
0
    def evaluate(self,
                 original_images,
                 inversion_images,
                 labels=None,
                 new_network=None):
        """
        Evaluate the quality of inverted images by three index: the average L2 distance and SSIM value between
        original images and inversion images, and the average of inverted images' confidence on true labels of inverted
        inferred by a new trained network.

        Args:
            original_images (numpy.ndarray): Original images, whose shape should be (img_num, channels, img_width,
                img_height).
            inversion_images (numpy.ndarray): Inversion images, whose shape should be (img_num, channels, img_width,
                img_height).
            labels (numpy.ndarray): Ground truth labels of original images. Default: None.
            new_network (Cell): A network whose structure contains all parts of self._network, but loaded with different
                checkpoint file. Default: None.

        Returns:
            - float, l2 distance.

            - float, average ssim value.

            - Union[float, None], average confidence. It would be None if labels or new_network is None.

        Examples:
            >>> net = LeNet5()
            >>> inversion_attack = ImageInversionAttack(net, input_shape=(1, 32, 32), input_bound=(0, 1),
            >>> loss_weights=[1, 0.2, 5])
            >>> features = np.random.random((2, 10)).astype(np.float32)
            >>> inver_images = inversion_attack.generate(features, iters=10)
            >>> ori_images = np.random.random((2, 1, 32, 32))
            >>> result = inversion_attack.evaluate(ori_images, inver_images)
            >>> print(len(result))
        """
        check_numpy_param('original_images', original_images)
        check_numpy_param('inversion_images', inversion_images)
        if labels is not None:
            check_numpy_param('labels', labels)
            true_labels = np.squeeze(labels)
            if len(true_labels.shape) > 1:
                msg = 'Shape of true_labels should be (1, n) or (n,), but got {}'.format(
                    true_labels.shape)
                raise ValueError(msg)
            if true_labels.size != original_images.shape[0]:
                msg = 'The size of true_labels should equal the number of images, but got {} and {}'.format(
                    true_labels.size, original_images.shape[0])
                raise ValueError(msg)
        if new_network is not None:
            check_param_type('new_network', new_network, Cell)
            LOGGER.info(
                TAG,
                'Please make sure that the network you pass is loaded with different checkpoint files '
                'compared with that of self._network.')

        img_1, img_2 = check_equal_shape('original_images', original_images,
                                         'inversion_images', inversion_images)
        if (len(img_1.shape) != 4) or (img_1.shape[1] != 1
                                       and img_1.shape[1] != 3):
            msg = 'The shape format of img_1 and img_2 should be (img_num, channels, img_width, img_height),' \
                  ' but got {} and {}'.format(img_1.shape, img_2.shape)
            raise ValueError(msg)

        total_l2_distance = 0
        total_ssim = 0
        img_1 = img_1.transpose(0, 2, 3, 1)
        img_2 = img_2.transpose(0, 2, 3, 1)
        for i in range(img_1.shape[0]):
            _, l2_dis, _ = calculate_lp_distance(img_1[i], img_2[i])
            total_l2_distance += l2_dis
            total_ssim += compute_ssim(img_1[i], img_2[i])
        avg_l2_dis = total_l2_distance / img_1.shape[0]
        avg_ssim = total_ssim / img_1.shape[0]
        avg_confi = None
        if (new_network is not None) and (labels is not None):
            pred_logits = new_network(
                Tensor(inversion_images.astype(np.float32))).asnumpy()
            logits_softmax = softmax(pred_logits, axis=1)
            avg_confi = np.mean(logits_softmax[np.arange(img_1.shape[0]),
                                               true_labels])
        return avg_l2_dis, avg_ssim, avg_confi
Exemplo n.º 8
0
def _compute_ssim(img_1, img_2, kernel_sigma=1.5, kernel_width=11):
    """
    compute structural similarity.
    Args:
        img_1 (numpy.ndarray): The first image to be compared.
        img_2 (numpy.ndarray): The second image to be compared.
        kernel_sigma (float): Gassian kernel param. Default: 1.5.
        kernel_width (int): Another Gassian kernel param. Default: 11.

    Returns:
        float, structural similarity.
    """
    img_1, img_2 = check_equal_shape('images_1', img_1, 'images_2', img_2)

    if len(img_1.shape) > 2:
        total_ssim = 0
        for i in range(img_1.shape[2]):
            total_ssim += _compute_ssim(img_1[:, :, i], img_2[:, :, i])
        return total_ssim / 3

    # Create gaussian kernel
    gaussian_kernel = np.zeros((kernel_width, kernel_width))
    for i in range(kernel_width):
        for j in range(kernel_width):
            gaussian_kernel[i,
                            j] = (1 /
                                  (2 * np.pi * (kernel_sigma**2))) * np.exp(-(
                                      ((i - 5)**2) +
                                      ((j - 5)**2)) / (2 * (kernel_sigma**2)))

    img_1 = img_1.astype(np.float32)
    img_2 = img_2.astype(np.float32)

    img_sq_1 = img_1**2
    img_sq_2 = img_2**2
    img_12 = img_1 * img_2

    # Mean
    img_mu_1 = convolve(img_1, gaussian_kernel)
    img_mu_2 = convolve(img_2, gaussian_kernel)

    # Mean square
    img_mu_sq_1 = img_mu_1**2
    img_mu_sq_2 = img_mu_2**2
    img_mu_12 = img_mu_1 * img_mu_2

    # Variances
    img_sigma_sq_1 = convolve(img_sq_1, gaussian_kernel)
    img_sigma_sq_2 = convolve(img_sq_2, gaussian_kernel)

    # Covariance
    img_sigma_12 = convolve(img_12, gaussian_kernel)

    # Centered squares of variances
    img_sigma_sq_1 = img_sigma_sq_1 - img_mu_sq_1
    img_sigma_sq_2 = img_sigma_sq_2 - img_mu_sq_2
    img_sigma_12 = img_sigma_12 - img_mu_12

    k_1 = 0.01
    k_2 = 0.03
    c_1 = (k_1 * 255)**2
    c_2 = (k_2 * 255)**2

    # Calculate ssim
    num_ssim = (2 * img_mu_12 + c_1) * (2 * img_sigma_12 + c_2)
    den_ssim = (img_mu_sq_1 + img_mu_sq_2 + c_1) * (img_sigma_sq_1 +
                                                    img_sigma_sq_2 + c_2)
    res = np.average(num_ssim / den_ssim)
    return res
Exemplo n.º 9
0
    def generate(self, inputs, labels):
        """
        Generate adversarial examples based on input data and targeted labels.

        Args:
            inputs (numpy.ndarray): Input samples.
            labels (numpy.ndarray): The ground truth label of input samples
                or target labels.

        Returns:
            numpy.ndarray, generated adversarial examples.

        Examples:
            >>> advs = attack.generate([[0.1, 0.2, 0.6], [0.3, 0, 0.4]], [1, 2]]
        """

        LOGGER.debug(TAG, "enter the func generate.")
        inputs, labels = check_pair_numpy_param('inputs', inputs, 'labels',
                                                labels)
        if not self._sparse:
            labels = np.argmax(labels, axis=1)
        self._dtype = inputs.dtype
        att_original = self._to_attack_space(inputs)
        reconstructed_original, _ = self._to_model_space(att_original)

        # find an adversarial sample
        const = np.ones_like(labels, dtype=self._dtype) * self._initial_const
        lower_bound = np.zeros_like(labels, dtype=self._dtype)
        upper_bound = np.ones_like(labels, dtype=self._dtype) * np.inf
        adversarial_res = inputs.copy()
        adversarial_loss = np.ones_like(labels, dtype=self._dtype) * np.inf
        samples_num = labels.shape[0]
        adv_flag = np.zeros_like(labels)
        for binary_search_step in range(self._bin_search_steps):
            if (binary_search_step == self._bin_search_steps - 1) and \
                    (self._bin_search_steps >= 10):
                const = min(1e10, upper_bound)
            LOGGER.debug(TAG, 'starting optimization with const = %s',
                         str(const))

            att_perturbation = np.zeros_like(att_original, dtype=self._dtype)
            loss_at_previous_check = np.ones_like(labels,
                                                  dtype=self._dtype) * np.inf

            # create a new optimizer to minimize the perturbation
            optimizer = _AdamOptimizer(att_perturbation.shape)

            for iteration in range(self._max_iterations):
                x_input, dxdp = self._to_model_space(att_original +
                                                     att_perturbation)
                logits = self._network(Tensor(x_input)).asnumpy()

                current_l2_loss, current_loss, dldx = self._loss_function(
                    logits, x_input, reconstructed_original, labels, const,
                    self._confidence)

                # check if attack success (include all examples)
                if self._targeted:
                    is_adv = (np.argmax(logits, axis=1) == labels)
                else:
                    is_adv = (np.argmax(logits, axis=1) != labels)

                for i in range(samples_num):
                    if is_adv[i]:
                        adv_flag[i] = True
                        if current_l2_loss[i] < adversarial_loss[i]:
                            adversarial_res[i] = x_input[i]
                            adversarial_loss[i] = current_l2_loss[i]

                if np.all(adv_flag):
                    if self._fast:
                        LOGGER.debug(TAG, "succeed find adversarial examples.")
                        msg = 'iteration: {}, logits_att: {}, ' \
                              'loss: {}, l2_dist: {}' \
                            .format(iteration,
                                    np.argmax(logits, axis=1),
                                    current_loss, current_l2_loss)
                        LOGGER.debug(TAG, msg)
                        return adversarial_res

                dldx, inputs = check_equal_shape('dldx', dldx, 'inputs',
                                                 inputs)

                gradient = dldx * dxdp
                att_perturbation += \
                    optimizer(gradient, self._learning_rate)

                # check if should stop iteration early
                flag = True
                iter_check = iteration % (np.ceil(
                    self._max_iterations * self._abort_early_check_ratio))
                if self._abort_early and iter_check == 0:
                    # check progress
                    for i in range(inputs.shape[0]):
                        if current_loss[i] <= .9999 * loss_at_previous_check[i]:
                            flag = False
                    # stop Adam if all samples has no progress
                    if flag:
                        LOGGER.debug(
                            TAG, 'step:%d, no progress yet, stop iteration',
                            binary_search_step)
                        break
                    loss_at_previous_check = current_loss

            for i in range(samples_num):
                # update bound based on search result
                if adv_flag[i]:
                    LOGGER.debug(
                        TAG, 'example %d, found adversarial with const=%f', i,
                        const[i])
                    upper_bound[i] = const[i]
                else:
                    LOGGER.debug(
                        TAG, 'example %d, failed to find adversarial'
                        ' with const=%f', i, const[i])
                    lower_bound[i] = const[i]

                if upper_bound[i] == np.inf:
                    const[i] *= 10
                else:
                    const[i] = (lower_bound[i] + upper_bound[i]) / 2

        return adversarial_res
Exemplo n.º 10
0
    def _loss_function(self, logits, new_x, org_x, org_or_target_class,
                       constant, confidence):
        """
        Calculate the value of loss function and gradients of loss w.r.t inputs.

        Args:
            logits (numpy.ndarray): The output of network before softmax.
            new_x (numpy.ndarray): Adversarial examples.
            org_x (numpy.ndarray): Original benign input samples.
            org_or_target_class (numpy.ndarray): Original/target labels.
            constant (float): A trade-off constant to use to balance loss
                and perturbation norm.
            confidence (float): Confidence level of the output of adversarial
                examples.

        Returns:
            numpy.ndarray, norm of perturbation, sum of the loss and the
            norm, and gradients of the sum w.r.t inputs.

        Raises:
            ValueError: If loss is less than 0.

        Examples:
            >>> L2_loss, total_loss, dldx = self._loss_function([0.2 , 0.3,
            >>> 0.5], [0.1, 0.2, 0.2, 0.4], [0.12, 0.2, 0.25, 0.4], [1], 2, 0)
        """
        LOGGER.debug(TAG, "enter the func _loss_function.")

        logits = check_numpy_param('logits', logits)
        org_x = check_numpy_param('org_x', org_x)
        new_x, org_or_target_class = check_pair_numpy_param(
            'new_x', new_x, 'org_or_target_class', org_or_target_class)

        new_x, org_x = check_equal_shape('new_x', new_x, 'org_x', org_x)

        other_class_index = _best_logits_of_other_class(logits,
                                                        org_or_target_class,
                                                        value=np.inf)
        loss1 = np.sum((new_x - org_x)**2,
                       axis=tuple(range(len(new_x.shape))[1:]))
        loss2 = np.zeros_like(loss1, dtype=self._dtype)
        loss2_grade = np.zeros_like(new_x, dtype=self._dtype)
        jaco_grad = jacobian_matrix(self._net_grad, new_x, self._num_classes)
        if self._targeted:
            for i in range(org_or_target_class.shape[0]):
                loss2[i] = max(
                    0, logits[i][other_class_index[i]] -
                    logits[i][org_or_target_class[i]] + confidence)
                loss2_grade[i] = constant[i] * (
                    jaco_grad[other_class_index[i]][i] -
                    jaco_grad[org_or_target_class[i]][i])
        else:
            for i in range(org_or_target_class.shape[0]):
                loss2[i] = max(
                    0, logits[i][org_or_target_class[i]] -
                    logits[i][other_class_index[i]] + confidence)
                loss2_grade[i] = constant[i] * (
                    jaco_grad[org_or_target_class[i]][i] -
                    jaco_grad[other_class_index[i]][i])
        total_loss = loss1 + constant * loss2
        loss1_grade = 2 * (new_x - org_x)
        for i in range(org_or_target_class.shape[0]):
            if loss2[i] < 0:
                msg = 'loss value should greater than or equal to 0, ' \
                      'but got loss2 {}'.format(loss2[i])
                LOGGER.error(TAG, msg)
                raise ValueError(msg)
            if loss2[i] == 0:
                loss2_grade[i, ...] = 0
        total_loss_grade = loss1_grade + loss2_grade
        return loss1, total_loss, total_loss_grade