Exemplo n.º 1
0
    def _run_optimized(self, dataset, logger):
        ''' The `run` method for 'cw' and 'deepfool'. '''
        # the attack is already configured in `config()`
        self.attack.config(logger=logger)

        rs = []

        iterator = dataset_to_iterator(dataset.batch(self.batch_size),
                                       self._session)
        for i_batch, (_, xs, ys, ys_target) in enumerate(iterator):
            if logger:
                begin = i_batch * len(xs)
                logger.info('n={}..{}'.format(begin, begin + len(xs) - 1))
            xs_adv = self.attack.batch_attack(xs, ys, ys_target)
            for x, x_adv, success in zip(xs, xs_adv,
                                         self.attack.details['success']):
                if not success:
                    rs.append(np.nan)
                else:
                    if self.distance_metric == 'l_inf':
                        rs.append(np.max(np.abs(x_adv - x)))
                    else:
                        rs.append(np.sqrt(np.sum((x_adv - x)**2)))

        return np.array(rs)
Exemplo n.º 2
0
    def _run_cw(self, dataset, logger):
        ''' The `run` method for 'cw'. '''
        # the attack is already configured in `config()`
        iterations = [
            int(self.iteration * i / self.cw_n_points)
            for i in range(1, self.cw_n_points + 1)
        ]
        rs = {step: ([], []) for step in iterations}

        iterator = dataset_to_iterator(dataset.batch(self.batch_size),
                                       self._session)
        for i_batch, (_, xs, ys, ys_target) in enumerate(iterator):
            for iteration in iterations:
                self.attack.config(iteration=iteration)
                xs_adv = self.attack.batch_attack(xs, ys, ys_target)
                labels, dists = self._session.run(self._cw_data,
                                                  feed_dict={
                                                      self._xs_ph: xs,
                                                      self._xs_adv_ph: xs_adv
                                                  })
                rs[iteration][0].append(labels)
                rs[iteration][1].append(dists)
                if logger:
                    begin = i_batch * len(xs)
                    logger.info('n={}..{}: iteration={}'.format(
                        begin, begin + len(xs) - 1, iteration))

        for key in rs.keys():
            rs[key] = (np.concatenate(rs[key][0]), np.concatenate(rs[key][1]))

        return rs
Exemplo n.º 3
0
    def _run_deepfool(self, dataset, logger):
        ''' The `run` method for 'deepfool'. '''
        # the attack is already configured in `config()`
        rs = {step: ([], []) for step in range(1, self.attack.iteration + 1)}

        iterator = dataset_to_iterator(dataset.batch(self.batch_size),
                                       self._session)
        for i_batch, (_, xs, ys, ys_target) in enumerate(iterator):
            g = self.attack.batch_attack(xs, ys, ys_target)
            try:
                step = 0
                while True:
                    step += 1
                    labels, dists = next(g)
                    rs[step][0].append(labels)
                    rs[step][1].append(dists)
                    if logger:
                        begin = i_batch * len(xs)
                        logger.info('n={}..{}: iteration={}'.format(
                            begin, begin + len(xs) - 1, step))
            except StopIteration:
                # DeepFool would early stop. Padding the remaining steps with the last step's data.
                labels, dists = rs[step - 1][0][-1], rs[step - 1][1][-1]
                for remain_step in range(step, self.attack.iteration + 1):
                    rs[remain_step][0].append(labels)
                    rs[remain_step][1].append(dists)

        for key in rs.keys():
            rs[key] = (np.concatenate(rs[key][0]), np.concatenate(rs[key][1]))

        return rs
Exemplo n.º 4
0
    def _run_binsearch_nes_family(self, dataset, logger):
        ''' The ``run`` method for nes, spsa & nattack. '''
        # the attack is already configured in `config()`
        self.attack.config(logger=logger)

        rs = []

        iterator = dataset_to_iterator(dataset, self._session)
        for n, (_, x, y, y_target) in enumerate(iterator):
            found = False
            lo = 0.0
            hi = self.init_distortion
            x_result = np.zeros_like(x)

            for i in range(self.search_steps):
                if self.attack_name == 'nes':
                    self.attack.config(magnitude=hi, lr=hi * self.nes_lr_factor, min_lr=hi * self.nes_min_lr_factor)
                elif self.attack_name == 'spsa':
                    self.attack.config(magnitude=hi, lr=hi * self.spsa_lr_factor)
                else:  # self.attack_name == 'nattack':
                    self.attack.config(magnitude=hi)
                x_adv = self.attack.attack(x, y, y_target)
                succ = self.attack.details['success']
                if logger:
                    logger.info('search n={}: i={}, success={}'.format(n, i, succ))
                if succ:
                    found = True
                    x_result = x_adv
                    break
                lo = hi
                hi *= 2.0

            for i in range(self.binsearch_steps):
                mi = (lo + hi) / 2
                if self.attack_name == 'nes':
                    self.attack.config(magnitude=mi, lr=mi * self.nes_lr_factor, min_lr=mi * self.nes_min_lr_factor)
                elif self.attack_name == 'spsa':
                    self.attack.config(magnitude=mi, lr=mi * self.spsa_lr_factor)
                else:  # self.attack_name == 'nattack':
                    self.attack.config(magnitude=mi)
                x_adv = self.attack.attack(x, y, y_target)
                succ = self.attack.details['success']
                if succ:
                    hi = mi
                    found = True
                    x_result = x_adv
                else:
                    lo = mi
                if logger:
                    logger.info('binsearch n={}: i={}, success={}'.format(n, i, succ))

            if not found:  # all attacks failed
                rs.append(np.nan)
            else:
                if self.distance_metric == 'l_inf':
                    rs.append(np.max(np.abs(x_result - x)))
                else:
                    rs.append(np.sqrt(np.sum((x_result - x)**2)))

        return np.array(rs)
Exemplo n.º 5
0
    def _run_basic(self, dataset, logger):
        ''' The `run` method for 'bim', 'pgd', 'mim'. '''
        # the attack is already configured in `config()`
        rs = dict()

        iterator = dataset_to_iterator(dataset.batch(self.batch_size),
                                       self._session)
        for i_batch, (_, xs, ys, ys_target) in enumerate(iterator):
            g = self.attack.batch_attack(xs, ys, ys_target)
            try:
                step = 0
                while True:
                    step += 1
                    labels, dists = next(g)
                    if step in rs:
                        rs[step][0].append(labels)
                        rs[step][1].append(dists)
                    else:
                        rs[step] = ([labels], [dists])
                    if logger:
                        begin = i_batch * len(xs)
                        logger.info('n={}..{}: iteration={}'.format(
                            begin, begin + len(xs) - 1, step))
            except StopIteration:
                pass

        for key in rs.keys():
            rs[key] = (np.concatenate(rs[key][0]), np.concatenate(rs[key][1]))

        return rs
Exemplo n.º 6
0
def gen_starting_points(model,
                        ys,
                        ys_target,
                        goal,
                        dataset_name,
                        session,
                        pred_fn,
                        cache=None):
    ''' Generate starting points which are already adversarial according to the adversarial goal.

    :param model: The model.
    :param ys: True labels.
    :param ys_target: Targetted labels.
    :param goal: Adversarial goal.
    :param dataset_name: The dataset's name. All valid values are ``'cifar10'`` and ``'imagenet'``.
    :param session: ``tf.Session`` for loading dataset.
    :param pred_fn: A function which accepts a batch of model inputs as a numpy array and returns the model's
        predictions.
    :param cache: A cache for reusing generated starting points. A dictionary. Same cache shall not be shared between
        different model and adversarial goal.
    :return: Starting points as a numpy array.
    '''
    if cache is None:
        cache = dict()

    starting_points = np.zeros((len(ys), *model.x_shape),
                               dtype=model.x_dtype.as_numpy_dtype)

    if goal in ('ut', 'tm'):
        for index, y in enumerate(ys):
            y = int(y)
            if y not in cache:
                while True:
                    x = np.random.uniform(low=model.x_min,
                                          high=model.x_max,
                                          size=(1, *model.x_shape))
                    x = x.astype(model.x_dtype.as_numpy_dtype)
                    x_pred = pred_fn(x)[0]
                    if x_pred != y:
                        cache[y] = x[0]
                        break
            starting_points[index] = cache[y]
    else:
        for index, y in enumerate(ys_target):
            if y not in cache:
                if dataset_name == 'cifar10':
                    dataset = cifar10.load_dataset_for_classifier(
                        model, target_label=y).batch(1)
                else:
                    dataset = imagenet.load_dataset_for_classifier(
                        model, target_label=y).batch(1)
                for _, x, _ in dataset_to_iterator(dataset, session):
                    x_pred = pred_fn(x)[0]
                    if x_pred == y:
                        cache[y] = x[0]
                        break
            starting_points[index] = cache[y]

    return starting_points
Exemplo n.º 7
0
    def _run_decision_based(self, dataset, logger):
        ''' The `run` method for 'boundary', 'evolutionary'. '''
        # the attack is already configured in `config()`
        iterator = dataset_to_iterator(dataset.batch(self.batch_size),
                                       self._session)

        def pred_fn(xs):
            return self._session.run(self._xs_label,
                                     feed_dict={self._xs_ph: xs})

        cache = dict()

        rs = dict()
        for i_batch, (_, xs, ys, ys_target) in enumerate(iterator):
            starting_points = gen_starting_points(self.model, ys, ys_target,
                                                  self.goal, self.dataset_name,
                                                  self._session, pred_fn,
                                                  cache)
            self.attack.config(starting_points=starting_points)

            g = self.attack.batch_attack(xs, ys, ys_target)
            try:
                step = 0
                while True:
                    step += 1
                    labels, dists = next(g)
                    if step in rs:
                        rs[step][0].append(labels)
                        rs[step][1].append(dists)
                    else:
                        rs[step] = ([labels], [dists])
                    if logger:
                        begin = i_batch * len(xs)
                        logger.info('n={}..{}: iteration={}'.format(
                            begin, begin + len(xs) - 1, step))
            except StopIteration:
                pass

        for key in rs.keys():
            rs[key] = (np.concatenate(rs[key][0]), np.concatenate(rs[key][1]))

        return rs
Exemplo n.º 8
0
    def _run_score_based(self, dataset, logger):
        ''' The `run` method for 'nes', 'spsa', 'nattack'. '''
        # the attack is already configured in `config()`
        iterator = dataset_to_iterator(dataset, self._session)

        ts = []
        for i, (_, x, y, y_target) in enumerate(iterator):
            x_adv = self.attack.attack(x, y, y_target)
            labels, dists = self._session.run(self._score_based_data,
                                              feed_dict={
                                                  self._x_ph: x,
                                                  self._x_adv_ph: x_adv
                                              })
            label, dist, queries = labels[0], dists[0], self.attack.details[
                'queries']
            ts.append((label, dist, queries))
            if logger:
                logger.info('n={}, {}'.format(i, self.attack.details))

        labels = np.array([x[0] for x in ts])
        dists = np.array([x[1] for x in ts])
        queries = np.array([x[2] for x in ts])

        return labels, dists, queries
Exemplo n.º 9
0
    def _run_binsearch(self, dataset, logger):
        ''' The `run` method for 'fgsm'. '''
        # the attack is already configured in `config()`
        rs = []

        iterator = dataset_to_iterator(dataset.batch(self.batch_size),
                                       self._session)
        for i_batch, (_, xs, ys, ys_target) in enumerate(iterator):
            # create numpy index for fetching the original and target label's logit value
            ys_range = np.arange(0, self.batch_size * self.model.n_class,
                                 self.model.n_class)
            ys_flatten = ys_range.astype(
                self.model.y_dtype.as_numpy_dtype) + ys
            ys_target_flatten = ys_range.astype(
                self.model.y_dtype.as_numpy_dtype) + ys_target
            del ys_range

            lo = np.zeros(self.batch_size, dtype=np.float32)
            hi = lo + self.init_distortion

            # set xs_result to zeros initially, so that if the attack fails all the way down we could know it.
            xs_result = np.zeros_like(xs)

            # use linear search to find an adversarial magnitude since fgsm do not play well with exponential search
            # The attack would be run with magnitude of:
            # [ init_distortion * 1, init_distortion * 2, ..., init_distortion * (2**search_steps) ].
            # The 2**search_steps here intends to archive the similar sematic as exponential search.
            for i in range(2**self.search_steps):
                magnitude = self.init_distortion * (2**self.search_steps - i)
                # config the attack
                self.attack.config(magnitude=magnitude)
                # run the attack
                xs_adv = self.attack.batch_attack(xs, ys, ys_target)
                logits = self._session.run(self._logits,
                                           feed_dict={self._xs_ph: xs_adv})
                # check if attack succeed considering the confidence value
                if self.goal == 'ut' or self.goal == 'tm':
                    # for ut and tm goal, if the original label's logit is not the largest one, the example is
                    # adversarial.
                    succ = logits.max(
                        axis=1) - logits.take(ys_flatten) > self.confidence
                else:
                    # for t goal, if the target label's logit is the largest one, the example is adversarial.
                    logits_this = logits.take(ys_target_flatten)
                    logits = logits.flatten()
                    logits[ys_target_flatten] = np.nan
                    logits_that = np.nanmax(logits.reshape(
                        (self.batch_size, -1)),
                                            axis=1)
                    succ = logits_this - logits_that > self.confidence
                # update the advsearial examples
                xs_result[succ] = xs_adv[succ]
                # update the smallest adversarial magnitude
                hi[succ] = magnitude
                if logger:
                    begin = i_batch * len(xs)
                    logger.info(
                        'linsearch n={}..{}: i={}, success_rate={:.3f}'.format(
                            begin, begin + len(xs) - 1, i,
                            succ.astype(np.float).mean()))
                if np.all(succ):
                    break

            lo = hi - self.init_distortion

            # run binsearch to find the minimal adversarial magnitude
            for i in range(self.binsearch_steps):
                # config the attack
                mi = (lo + hi) / 2
                self.attack.config(magnitude=mi)
                # run the attack
                xs_adv = self.attack.batch_attack(xs, ys, ys_target)
                logits = self._session.run(self._logits,
                                           feed_dict={self._xs_ph: xs_adv})
                # check if attack succeed considering the confidence value
                if self.goal == 'ut' or self.goal == 'tm':
                    # for ut and tm goal, if the original label's logit is not the largest one, the example is
                    # adversarial.
                    succ = logits.max(
                        axis=1) - logits.take(ys_flatten) > self.confidence
                else:
                    # for t goal, if the target label's logit is the largest one, the example is adversarial.
                    logits_this = logits.take(ys_target_flatten)
                    logits = logits.flatten()
                    logits[ys_target_flatten] = np.nan
                    logits_that = np.nanmax(logits.reshape(
                        (self.batch_size, -1)),
                                            axis=1)
                    succ = logits_this - logits_that > self.confidence
                # update the advsearial examples
                xs_result[succ] = xs_adv[succ]
                # update hi (if succeed) or lo (if not)
                not_succ = np.logical_not(succ)
                hi[succ] = mi[succ]
                lo[not_succ] = mi[not_succ]
                if logger:
                    begin = i_batch * len(xs)
                    logger.info(
                        'binsearch n={}..{}: i={}, success_rate={:.3f}'.format(
                            begin, begin + len(xs) - 1, i,
                            succ.astype(np.float).mean()))

            for x, x_result in zip(xs, xs_result):
                if np.all(x_result == 0):  # all attacks failed
                    rs.append(np.nan)
                else:
                    if self.distance_metric == 'l_inf':
                        rs.append(np.max(np.abs(x_result - x)))
                    else:
                        rs.append(np.sqrt(np.sum((x_result - x)**2)))

        return np.array(rs)
Exemplo n.º 10
0
from realsafe.model.ensemble import EnsembleModel, EnsembleRandomnessModel

batch_size = 100

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.Session(config=config)

model_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                          '../example/cifar10/resnet56.py')
model = load_model_from_path(model_path).load(session)
e_model = EnsembleModel([model, model], [0.5, 0.5])
er_model = EnsembleRandomnessModel(model, 10, session)

ds = cifar10.load_dataset_for_classifier(model).batch(batch_size).take(1)
_, xs, ys = next(dataset_to_iterator(ds, session))

xs_ph = tf.placeholder(model.x_dtype, shape=(batch_size, *model.x_shape))

labels = model.labels(xs_ph)
e_labels = e_model.labels(xs_ph)
er_labels = er_model.labels(xs_ph)

labels_np = session.run(labels, feed_dict={xs_ph: xs})
e_labels_np = session.run(e_labels, feed_dict={xs_ph: xs})
er_labels_np = session.run(er_labels, feed_dict={xs_ph: xs})

assert (np.array_equal(labels_np, e_labels_np))
assert (np.array_equal(labels_np, er_labels_np))

print(labels_np)
Exemplo n.º 11
0
    '../example/cifar10/adp.py',
]

rs = dict()
for model_path_short in MODELS:
    print('Loading {}...'.format(model_path_short))
    model_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                              model_path_short)
    model = load_model_from_path(model_path).load(session)
    dataset = cifar10.load_dataset_for_classifier(model,
                                                  offset=0,
                                                  load_target=True)
    xs_ph = tf.placeholder(model.x_dtype, shape=(None, *model.x_shape))
    labels = model.labels(xs_ph)

    accs = []
    for _ in range(10):
        for i_batch, (_, xs, ys, ys_target) in enumerate(
                dataset_to_iterator(dataset.batch(batch_size), session)):
            predictions = session.run(labels, feed_dict={xs_ph: xs})
            acc = np.equal(predictions, ys).astype(np.float32).mean()
            accs.append(acc)
            print('n={}..{} acc={:3f}'.format(
                i_batch * batch_size, i_batch * batch_size + batch_size - 1,
                acc))
    rs[model_path_short] = np.mean(accs)
    print('{} acc={:f}'.format(model_path, rs[model_path_short]))

for k, v in rs.items():
    print('{} acc={:f}'.format(k, v))
Exemplo n.º 12
0
    def run(self, dataset, logger):
        ''' Run the attack on the dataset.

        :param dataset: A ``tf.data.Dataset`` instance, whose first element is the unique identifier for the data point,
            second element is the image, third element is the ground truth label. If the goal is 'tm' or 't', a forth
            element should be provided as the target label for the attack.
        :param logger: A standard logger.
        :return: A tuple of five numpy array. The first element represents whether the model predicting correctly on
            each dataset point. The second element represents whether the model predicting correctly on the adversarial
            example for each dataset point. The third element represents whether the dataset point is non-adversarial
            according the the goal. The fourth element represents whether the attack succeed. The fifth element
            is the generated adversarial example's distance to the dataset's original example.
        '''
        acc, acc_adv, total, succ, dist = [], [], [], [], []

        def update(accs, accs_adv, totals, succs, dists):
            acc.append(accs)
            acc_adv.append(accs_adv)
            total.append(totals)
            succ.append(succs)
            dist.append(dists)
            if logger is not None:
                logger.info(
                    'acc={:3f}, adv_acc={:3f}, succ={:3f}, dist_mean={:3f}'.
                    format(
                        np.mean(accs.astype(np.float)),
                        np.mean(accs_adv.astype(np.float)),
                        np.sum(succs.astype(np.float)) /
                        np.sum(totals.astype(np.float)), np.mean(dists)))

        if self.attack_name in ('fgsm', 'bim', 'pgd', 'mim', 'cw', 'deepfool'):
            iterator = dataset_to_iterator(dataset.batch(self.batch_size),
                                           self.session)
            for _, xs, ys, ts in iterator:
                xs_adv = self.attack.batch_attack(xs, ys, ts)
                xs_pred = self.session.run(self.xs_label,
                                           feed_dict={self.xs_ph: xs})
                xs_adv_pred = self.session.run(self.xs_label,
                                               feed_dict={self.xs_ph: xs_adv})
                update(*self._batch_info(xs, xs_adv, ys, ts, xs_pred,
                                         xs_adv_pred))

        elif self.attack_name in ('boundary', 'evolutionary'):
            cache = dict()
            iterator = dataset_to_iterator(dataset.batch(self.batch_size),
                                           self.session)

            def pred_fn(xs):
                return self.session.run(self.xs_label,
                                        feed_dict={self.xs_ph: xs})

            for _, xs, ys, ts in iterator:
                starting_points = gen_starting_points(self.model, ys, ts,
                                                      self.goal,
                                                      self.dataset_name,
                                                      self.session, pred_fn,
                                                      cache)
                self.config(starting_points=starting_points)
                xs_adv = self.attack.batch_attack(xs, ys, ts)
                xs_pred = self.session.run(self.xs_label,
                                           feed_dict={self.xs_ph: xs})
                xs_adv_pred = self.session.run(self.xs_label,
                                               feed_dict={self.xs_ph: xs_adv})
                update(*self._batch_info(xs, xs_adv, ys, ts, xs_pred,
                                         xs_adv_pred))

        elif self.attack_name in ('nes', 'spsa', 'nattack'):
            iterator = dataset_to_iterator(dataset, self.session)
            for _, x, y, t in iterator:
                x_adv = self.attack.attack(x, y, t)
                x_pred = self.session.run(self.xs_label,
                                          feed_dict={self.xs_ph: [x]})[0]
                x_adv_pred = self.session.run(self.xs_label,
                                              feed_dict={self.xs_ph:
                                                         [x_adv]})[0]
                xs, xs_adv, ys, ts = np.array([x]), np.array(
                    [x_adv]), np.array([y]), np.array([t])
                xs_pred, xs_adv_pred = np.array([x_pred
                                                 ]), np.array([x_adv_pred])
                update(*self._batch_info(xs, xs_adv, ys, ts, xs_pred,
                                         xs_adv_pred))

        return tuple(map(np.concatenate, (acc, acc_adv, total, succ, dist)))
Exemplo n.º 13
0
xs_ph = tf.placeholder(model.x_dtype, shape=(batch_size, *model.x_shape))
lgs, lbs = model.logits_and_labels(xs_ph)

dataset = imagenet.load_dataset_for_classifier(model, load_target=True)
dataset = dataset.batch(batch_size).take(10)

loss = CrossEntropyLoss(model)
attack = BIM(model=model,
             batch_size=batch_size,
             loss=loss,
             goal='ut',
             distance_metric='l_inf',
             session=session)
attack.config(
    iteration=50,
    magnitude=8.0 / 255.0,
    alpha=0.5 / 255.0,
)

accs, adv_accs = [], []
for filenames, xs, ys, ys_target in dataset_to_iterator(dataset, session):
    xs_adv = attack.batch_attack(xs, ys=ys)

    lbs_pred = session.run(lbs, feed_dict={xs_ph: xs})
    lbs_adv = session.run(lbs, feed_dict={xs_ph: xs_adv})

    accs.append(np.equal(ys, lbs_pred).astype(np.float).mean())
    adv_accs.append(np.equal(ys, lbs_adv).astype(np.float).mean())
    print(accs[-1], adv_accs[-1])

print(np.mean(accs), np.mean(adv_accs))