示例#1
0
def extract_anglediffs_epoch(filedata):
    # data holders
    batch_counter = 0
    prev_angdiff = 0.

    # result-holder...
    epoch_anglediffs = []

    # iterate over the csv files
    for bidx, datanum in tqdm( \
        enumerate(sorted(filedata.keys())), desc='  [a-extract]', total=len(filedata.keys())):
        cur_filedata = filedata[datanum]

        # : [clean] load the correct data
        cur_cdata = np.array([])
        if 'clean' in cur_filedata:

            # :: load the entire data
            cur_cdata = []
            for each_filedata in cur_filedata['clean']:
                each_cdata = io.load_from_csv(each_filedata)
                each_cdata = [
                    list(map(float, each_cline)) for each_cline in each_cdata
                ]
                each_cdata = np.array(each_cdata)
                cur_cdata.append(each_cdata)
            cur_cdata = sum(cur_cdata) / len(cur_cdata)

        # : [poison] load the poison data
        cur_pdata = np.array([])
        if 'poison' in cur_filedata:

            # :: load the entire data
            cur_pdata = []
            for each_filedata in cur_filedata['poison']:
                each_pdata = io.load_from_csv(each_filedata)
                each_pdata = [
                    list(map(float, each_pline)) for each_pline in each_pdata
                ]
                each_pdata = np.array(each_pdata)
                cur_pdata.append(each_pdata)
            cur_pdata = sum(cur_pdata) / len(cur_pdata)

        # : compute the angle between them
        if (cur_cdata.size != 0) \
            and (cur_pdata.size != 0):
            cur_angdiff = _compute_cosine_similarly(cur_cdata, cur_pdata)
            prev_angdiff = cur_angdiff

        # : store them to the data-holder
        epoch_anglediffs.append([cur_angdiff])

    # end for bidx....
    astore_csvfile = '{}_epoch.csv'.format(astore_file)
    io.store_to_csv(astore_csvfile, epoch_anglediffs)
    print(' : Angle differences are stored to [{}]'.format(astore_csvfile))
示例#2
0
def extract_magnitudes_epoch(filedata):
    # data holders
    batch_counter = 0
    prev_mclean = 0.
    prev_mpoison = None

    # result-holder...
    epoch_magnitudes = []

    # iterate over the csv files
    for bidx, datanum in tqdm( \
        enumerate(sorted(filedata.keys())), desc='  [m-extract]', total=len(filedata.keys())):
        cur_filedata = filedata[datanum]

        # : [clean] load the correct data
        cur_mclean = prev_mclean
        if 'clean' in cur_filedata:

            # :: load the entire data
            cur_cdata = []
            for each_filedata in cur_filedata['clean']:
                each_cdata = io.load_from_csv(each_filedata)
                each_cdata = [
                    list(map(float, each_cline)) for each_cline in each_cdata
                ]
                each_cdata = np.array(each_cdata)
                cur_cdata.append(each_cdata)
            cur_cdata = sum(cur_cdata) / len(cur_cdata)
            cur_mclean = np.linalg.norm(cur_cdata, 2)
            prev_mclean = cur_mclean

        # : [poison] load the poison data
        cur_mpoison = prev_mpoison
        if 'poison' in cur_filedata:

            # :: load the entire data
            cur_pdata = []
            for each_filedata in cur_filedata['poison']:
                each_pdata = io.load_from_csv(each_filedata)
                each_pdata = [
                    list(map(float, each_pline)) for each_pline in each_pdata
                ]
                each_pdata = np.array(each_pdata)
                cur_pdata.append(each_pdata)
            cur_pdata = sum(cur_pdata) / len(cur_pdata)
            cur_mpoison = np.linalg.norm(cur_pdata, 2)
            prev_mpoison = cur_mpoison

        # : store them to the data-holder
        epoch_magnitudes.append([cur_mclean, cur_mpoison])

    # end for bidx....
    mstore_csvfile = '{}_epoch.csv'.format(mstore_file)
    io.store_to_csv(mstore_csvfile, epoch_magnitudes)
    print(' : Magnitudes are stored to [{}]'.format(mstore_csvfile))
示例#3
0
            # :: record the best accuracy
            if best_acc < current_acc:
                best_at = epoch
                best_acc = current_acc

            # :: report the current state (cannot compute the total eps, as we split the ....)
            print (' : Epoch {} - acc {:.4f} (base) / {:.4f} (curr) / {:.4f} (best @ {})'.format( \
                epoch, baseline_acc, current_acc, best_acc, best_at))

            # :: flush the stdouts
            sys.stdout.flush()

            # :: info
            print(' : Poison {}, Clean {}'.format(total_pupdates,
                                                  total_cupdates))

    # end for epoch...

    # report the attack results...
    print (' : [Result] epoch {}, poison {}, base {:.4f}, best {:.4f} @ {}'.format( \
        epoch, x_poison.shape[0], baseline_acc, best_acc, best_at))

    # store the attack results
    attack_results = [[best_at, best_acc, baseline_acc, x_poison.shape[0]]]
    io.store_to_csv(results_data, attack_results)

    # finally
    print(' : Done, don\'t store the model')
    # done.
def do_tpoisoning(arguments):

    # --------------------------------------------------------------------------
    #   Passed arguments
    # --------------------------------------------------------------------------
    task_num = arguments[0]
    task_queue = arguments[1]
    args = arguments[2]

    # ------------------------------------------------------------
    #  Tensorflow configurations (load TF here, main causes an error)
    # ------------------------------------------------------------
    import tensorflow as tf
    from tensorflow.python.client import device_lib
    from tensorflow.compat.v1.logging import set_verbosity, ERROR
    from tensorflow.compat.v1.estimator.inputs import numpy_input_fn

    # these will load the tensorflow module, so load it here
    from utils import datasets, models

    # control tensorflow info. level
    # ------------------------------------------------------------
    #  Level | Level for Humans | Level Description
    # -------|------------------|---------------------------------
    #  0     | DEBUG            | [Default] Print all messages
    #  1     | INFO             | Filter out INFO messages
    #  2     | WARNING          | Filter out INFO & WARNING messages
    #  3     | ERROR            | Filter out all messages
    set_verbosity(ERROR)

    # ------------------------------------------------------------
    #  Run control... (for the error cases)
    # ------------------------------------------------------------
    skip_data = True if args.fromtidx else False
    skip_poison = True if (args.frompidx >= 0) else False
    print(
        ' : [Task: {}] skip conditions, from: [{}th target] w. [{}th poison]'.
        format(task_num, skip_data, skip_poison))

    # --------------------------------------------------------------------------
    #   Use the sampled dataset, not the entire one
    # --------------------------------------------------------------------------
    if os.path.exists(args.samples):
        # : load the indexes from the csv file (that contains the list of ints)
        sample_indexes = io.load_from_csv(args.samples)[0]
        sample_indexes = list(map(int, sample_indexes))
        print(' : [Task: {}] consider [{}] target sampled from the entirety'.
              format(task_num, len(sample_indexes)))
    else:
        sample_indexes = []
        print(' : [Task: {}] do not sample the targets, consider all.'.format(
            task_num))

    # ------------------------------------------------------------
    #  Do poisoning attacks for each case
    # ------------------------------------------------------------
    for each_data in task_queue:
        """
            Set store locations
        """
        # extract the store location (ex. vanilla_conv.../10.0_2_2000....)
        store_dir = args.poisond.split('/')[4:]
        store_dir = '/'.join(store_dir)

        # the target index
        poison_toks = each_data.split('/')
        poison_tkey = poison_toks[-1].replace('.pkl', '')
        poison_tkey = poison_tkey.split('_')[-1]

        # when we use sampling, check if the indexes are in our interest
        if (sample_indexes) \
            and (int(poison_tkey) not in sample_indexes):
            print(
                ' : [Task: {}][Target: {}] is not in our samples, skip'.format(
                    task_num, poison_tkey))
            continue

        # result dir and the file to store
        results_dir = os.path.join('results', 'tpoisoning', 'clean-labels',
                                   args.attmode, store_dir)
        if not os.path.exists(results_dir): os.makedirs(results_dir)
        result_file = os.path.join(results_dir,
                                   'attack_w_{}.csv'.format(poison_tkey))
        print(' : [Task: {}][Target: {}] Store the result to [{}]'.format(
            task_num, poison_tkey, result_file))
        """
            Skip the current data, based on the target index
        """
        if (args.fromtidx == poison_tkey): skip_data = False
        if skip_data:
            print(' : [Task: {}][Target: {}] Skip this...'.format(
                task_num, poison_tkey))
            continue
        """
            Load the attack data
        """
        # : load the dataset
        (x_train, y_train), (x_test, y_test) =  \
            datasets.define_dataset(args.dataset, args.datapth)

        # : bound check for the inputs
        assert (x_train.min() == 0.) and (x_train.max() == 1.) \
            and (x_test.min() == 0.) and (x_test.max() == 1.)
        print (' : [Task: {}][Target: {}] Load the dataset [{}] from [{}]'.format( \
            task_num, poison_tkey, args.dataset, args.datapth))

        # : load the poisons
        (x_poisons, y_poisons), (x_target, y_target) = \
            datasets.load_poisons(each_data, x_test, y_test, sort=True)

        # : existence of the poisons
        if (x_poisons.size == 0) or (y_poisons.size == 0):
            print(
                ' : [Task: {}][Target: {}] Doesn\'t have poisons, skip'.format(
                    task_num, poison_tkey))
            continue

        # : bound check for the poisons
        assert (x_train.min() == 0.) and (x_train.max() == 1.) \
            and (x_test.min() == 0.) and (x_test.max() == 1.)
        print(' : [Task: {}][Target: {}] Load the poisons from [{}]'.format(
            task_num, poison_tkey, each_data))
        """
            Blend poisons and re-train each model
            1) oneshot: consider only one poison at a time
            2) multipoison: consider multiple poisons at a time (0th ~ nth)
        """
        # : condition to stop attack (once the attacker successes on a target)
        stop_attack = False

        # : decide how many poisons to use
        for pidx in range(len(x_poisons)):

            # :: skip, if the attack has been successful
            if stop_attack: continue

            # :: set the poison index
            poison_index = pidx + 1

            # :: consider max. the number of poisons specified
            if (args.poisonn > 0) \
                and (poison_index > args.poisonn):
                print (' : [Task: {}][Target: {}][{:>3}] Stop, # of poisons to consider is [{}]'.format( \
                    task_num, poison_tkey, poison_index, args.poisonn))
                break

            # :: skip the current poison, based on the poison index
            if (args.frompidx == poison_index): skip_poison = False
            if skip_poison:
                print(' : [Task: {}][Target: {}][{:>3}] Skip this poison...'.
                      format(task_num, poison_tkey, poison_index))
                continue

            # :: cleanup directories in the previous runs
            _cleanup_directories(results_dir, poison_tkey)

            # :: copy the checkpoint to the result dir.
            result_pmodel = os.path.join(
                results_dir, '{}_{}'.format(poison_tkey, poison_index))
            shutil.copytree(args.netpath, result_pmodel)
            time.sleep(_wait_ops)  # delay for copying files
            print (' : [Task: {}][Target: {}][{:>3}] Copy the clean model to [{}]'.format( \
                task_num, poison_tkey, poison_index, result_pmodel))

            # :: tensorflow runtime configuration
            cur_rconf = tf.estimator.RunConfig(
                tf_random_seed=_rand_fix,
                keep_checkpoint_max=1,  # 0 means all, do not use
            )

            # :: extract the basic information from the model location
            mtokens = args.netpath.split('/')
            mtokens = mtokens[2].split('_')
            batch_size = int(mtokens[2])
            epochs = int(mtokens[3])
            if ('purchases' == args.dataset):
                epochs = epochs // 2
            else:
                epochs = 20 if (epochs > 20) else (epochs // 2)
            learn_rate = float(mtokens[4])

            # :: load the pre-trained model
            if not args.privacy:
                cur_model = models._load_vanilla_model( \
                    cur_rconf, \
                    args.dataset, args.datapth, args.network, result_pmodel, \
                    batch_size, learn_rate)
                print (' : [Task: {}][Target: {}][{:>3}] Load the '.format(task_num, poison_tkey, poison_index) + \
                        'pre-trained vanilla model from [{}]'.format(result_pmodel))
            else:
                # :: extract the extra information about privacy
                epsilon = float(mtokens[5])
                delta = float(mtokens[6])
                norm_clip = float(mtokens[7])
                noises = float(mtokens[8])

                # :: load the privacy model
                cur_model = models._load_dp_model( \
                    cur_rconf, \
                    args.dataset, args.datapth, x_train.shape[0], args.network, result_pmodel, \
                    batch_size, learn_rate, epsilon, delta, norm_clip, noises)
                print (' : [Task: {}][Target: {}][{:>3}] Load the '.format(task_num, poison_tkey, poison_index) + \
                        'pre-trained privacy model from [{}]'.format(result_pmodel))

            # :: blend poisons into the training data
            if 'oneshot' == args.attmode:
                cur_x_train = np.concatenate(
                    (x_train, x_poisons[poison_index - 1:poison_index]),
                    axis=0)
                cur_y_train = np.concatenate(
                    (y_train, y_poisons[poison_index - 1:poison_index]),
                    axis=0)
            elif 'multipoison' == args.attmode:
                cur_x_train = np.concatenate(
                    (x_train, x_poisons[:poison_index]), axis=0)
                cur_y_train = np.concatenate(
                    (y_train, y_poisons[:poison_index]), axis=0)
            else:
                assert False, ('Error: unknown attack mode - {}'.format(
                    args.attmode))

            # :: create the estimator functions
            cur_train_fn = numpy_input_fn(x={'x': cur_x_train},
                                          y=cur_y_train,
                                          batch_size=batch_size,
                                          num_epochs=epochs,
                                          shuffle=True)
            cur_test_fn = numpy_input_fn(x={'x': x_test},
                                         y=y_test,
                                         num_epochs=1,
                                         shuffle=False)
            cur_target_fn = numpy_input_fn(x={'x': x_target},
                                           y=y_target,
                                           num_epochs=1,
                                           shuffle=False)

            # :: condition to remove the retrained model
            remove_pmodel = True

            # :: to compare the probability changes from the oracle
            oracle_predict = cur_model.predict(input_fn=cur_target_fn)
            oracle_predict = list(oracle_predict)[0]
            oracle_bas_prob = oracle_predict['probabilities'][args.b_class]
            oracle_tar_prob = oracle_predict['probabilities'][args.t_class]

            # :: re-train the network with the poisoning data
            cur_steps_per_epoch = cur_x_train.shape[0] // batch_size
            for cur_epoch in range(1, epochs + 1):

                # ::: train for an epoch
                cur_model.train( \
                    input_fn=cur_train_fn, steps=cur_steps_per_epoch)

                # ::: evaluate for one instance
                cur_predicts = cur_model.predict(input_fn=cur_target_fn)
                cur_predicts = list(cur_predicts)[0]
                cur_probs = cur_predicts['probabilities']
                cur_bas_prob = cur_predicts['probabilities'][args.b_class]
                cur_tar_prob = cur_predicts['probabilities'][args.t_class]

                # ::: check if we have the successful attack
                if (cur_predicts['classes'] == args.t_class):

                    # > validate the re-trained model
                    cur_predicts = cur_model.evaluate(input_fn=cur_test_fn)
                    cur_accuracy = cur_predicts['accuracy']

                    # > only compute the accuracy (when no privacy)
                    if not args.privacy:
                        # > store the data to a file
                        cur_result = [[poison_tkey, poison_index, \
                                        oracle_bas_prob, oracle_tar_prob, \
                                        cur_bas_prob, cur_tar_prob, \
                                        cur_epoch, cur_accuracy]]
                        io.store_to_csv(result_file, cur_result, mode='a')

                        # > notify
                        print (' : [Task: {}][Target: {}][{:>3}] epoch {} - attack success!'.format( \
                            task_num, poison_tkey, poison_index, cur_epoch))
                        print ('  - Prob [3:{:.4f} / 4:{:.4f}], acc [{:.4f}]'.format( \
                            cur_bas_prob, cur_tar_prob, cur_accuracy), flush=True)

                    # > compute the epsilon (when privacy)
                    else:
                        cur_epsilon = models.compute_epsilon( \
                            cur_epoch * cur_steps_per_epoch, \
                            cur_x_train.shape[0], batch_size, delta, noises)

                        # > store the data to a file
                        cur_result = [[poison_tkey, poison_index, \
                                        oracle_bas_prob, oracle_tar_prob, \
                                        cur_bas_prob, cur_tar_prob, \
                                        cur_epoch, cur_accuracy, cur_epsilon]]
                        io.store_to_csv(result_file, cur_result, mode='a')

                        # > notify
                        print (' : [Task: {}][Target: {}][{:>3}] epoch {} - attack success!'.format( \
                            task_num, poison_tkey, poison_index, cur_epoch))
                        print ('  - Prob [3:{:.4f} / 4:{:.4f}], acc [{:.4f}], eps [{:.4f} <- {:.4f} + {:.4f}]'.format( \
                            cur_bas_prob, cur_tar_prob, cur_accuracy, cur_epsilon+epsilon, epsilon, cur_epsilon), flush=True)

                    # > stop the attack process (retain model and stop)
                    remove_pmodel = False
                    stop_attack = True
                    break

                # ::: if not successful
                else:
                    if (len(cur_probs) > 10): cur_probs = cur_probs[:10]
                    print (' : [Task: {}][Target: {}][{:>3}] epoch {} - attack fail, keep going - Prob [3:{:.4f} / 4:{:.4f}] - {}'.format( \
                        task_num, poison_tkey, poison_index, cur_epoch, cur_bas_prob, cur_tar_prob, cur_probs), flush=True)
                # ::: end if (cur_accuracy...

            # :: end for epoch...

            # :: remove model if it's true
            if remove_pmodel:
                shutil.rmtree(result_pmodel, ignore_errors=True)
                time.sleep(_wait_ops)
                print(' : [Task: {}][Target: {}] Attack failed, remove [{}]'.
                      format(task_num, poison_tkey, result_pmodel))

            # :: reset the tensorflow graph for another run
            tf.reset_default_graph()

        # : end for pidx...
    # end for aidx...

    print(' : [Task: {}] finished'.format(task_num))