Exemplo n.º 1
0
    def _blackmarks_embed(
            model,
            x_train,
            y_train,
            x_test,
            y_test,
            sess,
            eps=0.25,
            batch_size=128,
            epochs=5,
            min_delta=0.002,  # Minimal improvement per step
            patience=2,
            wm_epochs=5,
            fine_tuning=True,
            load_wm_model_func=None,
            weight_path='../../tmp/mnist_cnn_weights.hdf5',
            retrain=True):
        if retrain:
            try:
                model.load_weights(weight_path)
            except Exception as e:
                print(e)
                print('Cannot find pretrained weight. Start training...')
                checkpoint = ModelCheckpoint(weight_path,
                                             monitor='val_loss',
                                             verbose=1,
                                             save_best_only=True,
                                             mode='min',
                                             save_weights_only=True)

                model.fit(x_train,
                          y_train,
                          batch_size=batch_size,
                          epochs=epochs,
                          verbose=1,
                          validation_data=(x_test, y_test),
                          callbacks=[checkpoint])
        # Step 1: get the key length and we takes 10*keylength part of the training data to get the cluster
        key_length = len(list(sign))
        x_train = x_train[:key_length * 100]
        functor = K.function([model.input, K.learning_phase()],
                             [model.layers[-2].output])
        activation_out = functor([x_train, 1.])[0]
        activation_out = np.mean(activation_out, axis=0)
        activation_out = activation_out.reshape(-1, 1)
        kmeans = KMeans(n_clusters=2).fit(activation_out)
        clusters = kmeans.labels_

        cluster_one = np.where(clusters == 1)[0]
        cluster_zero = np.where(clusters == 0)[0]
        print(clusters)
        print(cluster_one)
        print(cluster_zero)
        # Step 2: classify the training input into clusters and assign them a target adversarial class label from a different cluster
        preds_op = model(model.inputs[0])
        preds_one_hot = sess.run(preds_op,
                                 feed_dict={model.inputs[0]: x_train})
        preds = np.argmax(preds_one_hot, axis=1)
        preds_cluster = np.isin(preds, cluster_one).astype('int')
        preds_target = [
            uniform_select(cluster_one)
            if i == 0 else uniform_select(cluster_zero)
            for i in list(preds_cluster)
        ]
        print(preds_target)
        preds_target_one_hot = keras.utils.to_categorical(
            preds_target, num_classes)
        # Step 3: Generate adversarial examples
        wrap = KerasModelWrapper(model)
        fgsm = FastGradientMethod(wrap)
        fgsm_params = {'eps': eps, 'y_target': preds_target_one_hot}
        adv_x_op = fgsm.generate(model.inputs[0], **fgsm_params)
        adv_x = sess.run(adv_x_op, feed_dict={model.inputs[0]: x_train})

        history_wm = AdditionalValidationSets([(adv_x, preds_one_hot,
                                                'watermark')])
        time_hist = TimeHistory()
        es = EarlyStopping(
            monitor='acc',
            mode='max',
            min_delta=min_delta,
            patience=patience,
            restore_best_weights=True)  # 0.5% improvement per step
        additional_callbacks = [
            ShowErrorsCallback(dataset=(adv_x, preds_one_hot),
                               prefix="Embed Trigger (Train)")
        ]
        callbacks = []
        if fine_tuning:
            callbacks = [time_hist, *additional_callbacks, history_wm, es]
        model.fit(adv_x,
                  preds_one_hot,
                  batch_size=batch_size,
                  epochs=wm_epochs,
                  verbose=1,
                  validation_data=(x_test, y_test),
                  callbacks=callbacks)

        # print("Fine tuning finished, start retraining...")
        # x_train = np.vstack((x_train, adv_x))
        # y_train = np.vstack((y_train, preds_one_hot))
        # order = np.arange(x_train.shape[0])
        # np.random.shuffle(order)
        # x_train = x_train[order]
        # y_train = y_train[order]
        # model.fit(x_train,
        #           y_train,
        #           batch_size=batch_size,
        #           epochs=wm_epochs,
        #           verbose=1,
        #           validation_data=(x_test, y_test),
        #           callbacks=[time_hist, *additional_callbacks, history_wm, es])
        history_wm.history = merge_histories(
            [history_wm, time_hist, *additional_callbacks, es])

        # Step 4: fine tuning the model to embed the watermark
        # We filter out the keys that are able to be identified by the nonwatermarked model
        model_no_wm = load_wm_model_func()
        model_no_wm.load_weights(weight_path)
        pred_no_wm_one_hot_op = model_no_wm(model.inputs[0])
        pred_wm_one_hot_op = model(model.inputs[0])
        pred_no_wm_one_hot, pred_wm_one_hot = sess.run(
            [pred_no_wm_one_hot_op, pred_wm_one_hot_op],
            feed_dict={model.inputs[0]: adv_x})
        pred_no_wm = np.argmax(pred_no_wm_one_hot, axis=1)
        pred_wm = np.argmax(pred_wm_one_hot, axis=1)
        key_candidate_cond1 = np.where(pred_no_wm != preds)[0]
        key_candidate_cond2 = np.where(pred_wm == preds)[0]
        key_candidate = np.intersect1d(key_candidate_cond1,
                                       key_candidate_cond2)
        print(key_candidate)
        wm_keys = adv_x[key_candidate]
        wm_keys_cluster = preds_cluster[key_candidate]
        wm_keys_one = np.where(wm_keys_cluster == 1)[0]
        wm_keys_zero = np.where(wm_keys_cluster == 0)[0]
        print(wm_keys_one[:np.sum(sign)])
        print(wm_keys_zero[:key_length - np.sum(sign)])

        acc1 = 0
        acc2 = 0
        embeded_keys = []
        for i in list(sign):
            if (i == 1):
                embeded_keys.append(wm_keys[wm_keys_one[acc1]])
                acc1 = acc1 + 1
            else:
                embeded_keys.append(wm_keys[wm_keys_zero[acc2]])
                acc2 = acc2 + 1
        embeded_keys = np.array(embeded_keys)
        print("#########HERE##########")
        print(embeded_keys.shape)
        print("#######################")

        cluster = (cluster_zero, cluster_one)

        trigger = {}
        trigger["keys"] = (embeded_keys,
                           keras.utils.to_categorical(sign, num_classes))
        trigger["clusters"] = (cluster_zero, cluster_one, sign)
        # need to change embeded_keys to history
        return model, history_wm, trigger
Exemplo n.º 2
0
def adversarial_whitebox(
        load_dataset_func,  # Which dataset to choose. Should return training and testing data
        dataset_label,  # Label of the dataset (for caching)
        load_wm_model_func,  # Model for wm_embedding (needs params {"reg","optimizer","freeze_first_layers"})
        wm_embed_func,
        owner_data_size=30000,
        total_owner_data_size=30000,
        attacker_data_size=15000,
        attacker_data_size_reg=10000,
        total_attacker_data_size=15000,
        epochs_reg=30,  #30
        epochs_surr=10,  #10
        early_stopping_wm_reg=0.2,  # At which watermark accuracy to stop the whitebox attack
        patience_reg=2,
        lr_surr=0.001,  # Learning rate for the surrogate model
        freeze_first_layers=0,  # How many layers to freeze for surrogate model
        reg_whitebox=0.003,
        reg_surr=0.0,
        batchsize_embed=64,
        batchsize_reg=64,
        batchsize_surr=64,
        wm_class=5,
        cache_embed_wm=None,
        cache_reg_model=None,
        cache_surr_model=None,
        verbose=False,
        fine_tuning=True,
        weight_path='../../tmp/mnist_cnn_weights.hdf5',
        cluster=False):
    sess = tf.Session()
    K.set_session(sess)

    cache_embed_wm, cache_reg_model, cache_surr_model, = concat_labels_if_not_none(
        [cache_embed_wm, cache_reg_model, cache_surr_model], dataset_label)

    (all_x, all_y), test_data = load_dataset_func()

    if owner_data_size + attacker_data_size > len(all_x):
        raise RuntimeError(
            "Whitebox Attack data error! Trying to consume more training data than there is available!"
            " {}>{}".format(owner_data_size + attacker_data_size, len(all_x)))

    owner_data, owner_data_from_cache = augment_data(
        set_to_augment=(all_x[:owner_data_size], all_y[:owner_data_size]),
        prefix=dataset_label,
        total_size=total_owner_data_size,
        use_cached_training_data="owner_data" + str(total_owner_data_size) +
        str(total_attacker_data_size),
        verbose=verbose)
    attacker_data, attacker_data_from_cache = augment_data(
        set_to_augment=(all_x[owner_data_size:owner_data_size +
                              attacker_data_size],
                        all_y[owner_data_size:owner_data_size +
                              attacker_data_size]),
        prefix=dataset_label,
        total_size=total_attacker_data_size,
        use_cached_training_data="attacker_data" + str(total_owner_data_size) +
        str(total_attacker_data_size),
        verbose=verbose)

    # Make sure to always regenerate both files if necessary
    if owner_data_from_cache != attacker_data_from_cache:
        raise RuntimeError(
            "Whitebox Attack data error! Sets are not mutually exclusive, please delete conflicting "
            "file ending in '{}'!".format(
                str(total_owner_data_size) + str(total_attacker_data_size)))

    wm_model, history_embed, trigger = wm_embed_func(
        load_wm_model_func(),
        owner_data[0],
        owner_data[1],
        test_data[0],
        test_data[1],
        sess,
        fine_tuning=fine_tuning,
        load_wm_model_func=load_wm_model_func)

    pred_y = wm_model.predict(attacker_data[0])
    attacker_data = attacker_data[0], pred_y
    attacker_data_reg = (attacker_data[0][:attacker_data_size_reg],
                         attacker_data[1][:attacker_data_size_reg])

    additional_callbacks2 = [
        ShowErrorsCallback(dataset=trigger["keys"], prefix="WB Trigger")
    ]
    surr_model_reg, reg_history = whitebox_attack(
        wm_model=wm_model,
        load_model_func=load_wm_model_func,
        load_func_kwargs={"reg": reg_whitebox},
        load_func_kwargs2={
            "reg": reg_surr,
            "optimizer": RMSprop(lr=lr_surr),
            "freeze_first_layers": freeze_first_layers
        },
        trigger_set=trigger,
        train_data=attacker_data_reg,
        test_data=test_data,
        batchsize=batchsize_reg,
        epochs_reg=epochs_reg,
        additional_callbacks=additional_callbacks2,
        early_stopping_wm=early_stopping_wm_reg,  # When to stop
        patience=patience_reg,
        cache_surr_model=cache_reg_model,
        verbose=False,
        cluster=cluster)

    additional_callbacks_surr = [
        ShowErrorsCallback(dataset=trigger["keys"],
                           prefix="BB Trigger (Train)")
    ]

    # randomized blackbox
    # comment out if you do not want perform this on attacker data
    # random_selection = np.random.random_sample(attacker_data_size)
    # random_selection = (random_selection < 0.005).astype('int64')
    # random_target = np.random.randint(10, size=sum(random_selection))
    # random_index = np.where(random_selection == 1)[0]
    # attacker_data[1][random_index] = keras.utils.to_categorical(
    #     random_target, num_classes)
    # print(sum(random_selection), " attacker data is twisted...")

    surr_model, history_surr = blackbox_attack(
        surrogate_model=surr_model_reg,
        epochs_surr=epochs_surr,
        train_data=attacker_data,
        trigger_set=trigger,
        test_data=test_data,
        batchsize=batchsize_surr,
        additional_callbacks=additional_callbacks_surr,
        cache_surr_model=cache_surr_model,
        verbose=False,
        cluster=cluster)

    # After the black-box attack, try to embed the watermark again to further
    # reduce the old watermark retention.
    print("####################################################")
    print("Watermark retention BEFORE embeding new watermark...")
    print(surr_model.evaluate(trigger["keys"][0], trigger["keys"][1]))
    print(surr_model.evaluate(test_data[0], test_data[1]))
    print("####################################################")

    surr_model, history_embed, _ = wm_embed_func(
        surr_model,
        attacker_data[0],
        attacker_data[1],
        test_data[0],
        test_data[1],
        sess,
        fine_tuning=fine_tuning,
        load_wm_model_func=load_wm_model_func,
        retrain=False)

    print("####################################################")
    print("Watermark retention AFTER embeding new watermark...")
    print(surr_model.evaluate(trigger["keys"][0], trigger["keys"][1]))
    print(surr_model.evaluate(test_data[0], test_data[1]))
    print("####################################################")

    baseline_model1 = load_wm_model_func()
    baseline_model1.load_weights(weight_path)

    baseline_model2 = load_wm_model_func()
    baseline_model2.fit(attacker_data[0],
                        attacker_data[1],
                        batch_size=64,
                        epochs=5,
                        verbose=1,
                        validation_data=(test_data[0], test_data[1]))

    baseline_eval1 = baseline_model1.evaluate(trigger["keys"][0],
                                              trigger["keys"][1])[0]
    baseline_eval2 = baseline_model2.evaluate(trigger["keys"][0],
                                              trigger["keys"][1])[0]
    print("This is the baseline:", baseline_eval1)
    print("This is the baseline:", baseline_eval2)
    print(baseline_model1.evaluate(owner_data[0], owner_data[1]))

    baseline = (baseline_eval1 / 100, baseline_eval2 / 100)

    return surr_model, (history_embed, reg_history, history_surr, baseline)
Exemplo n.º 3
0
def adversarial_blackbox(
        load_dataset_func,      # Which dataset to choose. Should return training and testing data
        dataset_label,          # Label of the dataset (for caching)
        load_wm_model_func,     # Model specification for owners model
        wm_embed_func,          # Watermark embedding function
        owner_data_size=35000,  # Data to load from repository
        total_owner_data_size=35000,        # Total data (with augmentation)
        attacker_data_size=25000,           # Data to load from repository
        total_attacker_data_size=25000,     # Total data (with augmentation)
        falsify_attacker_data=0.05,    # Ratio of labels to re-label randomly
        epochs_wm=5,            # Max number of epochs for owners model
        batchsize_wm=64,        # Batchsize for owners model
        epochs_surr=20,         # Max number of epochs for blackbox attack model
        batchsize_surr=64,      # Batch size for blackbox attack
        cache_surr_model=None,  # Whether to save the model (path required)
        weight_path='../../tmp/mnist_cnn_weights.hdf5',
        fine_tuning=True,
        cluster=False,
        rand_bb=False,
        verbose=False):
    """ Blackbox attack on adversarial embedding
    """
    sess = tf.Session()
    K.set_session(sess)

    # Load the owners model
    surrogate_model = load_wm_model_func()
    (all_x, all_y), test_data = load_dataset_func()

    if owner_data_size + attacker_data_size > len(all_x):
        raise RuntimeError(
            "Blackbox Attack data error! Trying to consume more training data than there is available!"
            " {}>{}".format(owner_data_size + attacker_data_size, len(all_x)))

    # Load owner and attacker data and assure they are mutually exclusive!
    owner_data, loaded_owner_from_cache = augment_data(
        set_to_augment=(all_x[:owner_data_size], all_y[:owner_data_size]),
        prefix=dataset_label,
        total_size=total_owner_data_size,
        use_cached_training_data="owner_data" + str(total_owner_data_size) +
        str(total_attacker_data_size),
        verbose=verbose)
    attacker_data, loaded_attacker_from_cache = augment_data(
        set_to_augment=(all_x[owner_data_size:owner_data_size +
                              attacker_data_size],
                        all_y[owner_data_size:owner_data_size +
                              attacker_data_size]),
        prefix=dataset_label,
        total_size=total_attacker_data_size,
        use_cached_training_data="attacker_data" + str(total_owner_data_size) +
        str(total_attacker_data_size),
        verbose=verbose)

    if loaded_owner_from_cache != loaded_attacker_from_cache:
        raise RuntimeError(
            "Blackbox Attack data error! One set was loaded from cache and the other wasn't. Cannot ensure "
            "that sets don't overlap.  Please delete conflicting file ending in '{}'!".format(
                str(total_owner_data_size) + str(total_attacker_data_size)))

    # Create the owners model with the embedded watermark
    wm_model, history_embed, trigger = wm_embed_func(
        load_wm_model_func(),
        owner_data[0],
        owner_data[1],
        test_data[0],
        test_data[1],
        sess,
        fine_tuning=fine_tuning,
        load_wm_model_func=load_wm_model_func)

    # Label the attackers data
    pred_y = wm_model.predict(attacker_data[0])
    attacker_data = attacker_data[0], pred_y

    additional_callbacks = [
        ShowErrorsCallback(dataset=trigger["keys"], prefix="BB Trigger")
    ]

    # Give 0.5% of the training data false value
    random_selection = np.random.random_sample(attacker_data_size)
    random_selection = (random_selection < falsify_attacker_data).astype('int64')
    random_target = np.random.randint(10, size=sum(random_selection))
    random_index = np.where(random_selection == 1)[0]
    attacker_data[1][random_index] = keras.utils.to_categorical(random_target, num_classes)

    print("##############################################")
    print("########### Starting Blackbox Attack #########")
    # Start the blackbox attack
    surr_model, history_surr = blackbox_attack(
        surrogate_model=surrogate_model,
        epochs_surr=epochs_surr,
        trigger_set=trigger,
        train_data=MNISTSequence(attacker_data[0], attacker_data[1],
                                 batchsize_surr) if rand_bb else attacker_data,
        test_data=test_data,
        batchsize=batchsize_surr,
        additional_callbacks=additional_callbacks,
        cache_surr_model=cache_surr_model,
        verbose=False,
        cluster=cluster)

    # After the black-box attack, try to embed the watermark again to further
    # reduce the old watermark retention.
    print("####################################################")
    print("Watermark retention BEFORE embeding new watermark...")
    print(surr_model.evaluate(trigger["keys"][0], trigger["keys"][1]))
    print(surr_model.evaluate(test_data[0], test_data[1]))
    print("####################################################")

    surr_model, history_embed, _ = wm_embed_func(
        surr_model,
        attacker_data[0],
        attacker_data[1],
        test_data[0],
        test_data[1],
        sess,
        fine_tuning=fine_tuning,
        load_wm_model_func=load_wm_model_func,
        retrain=False)

    print("####################################################")
    print("Watermark retention AFTER embeding new watermark...")
    print(surr_model.evaluate(trigger["keys"][0], trigger["keys"][1]))
    print(surr_model.evaluate(test_data[0], test_data[1]))
    print("####################################################")

    baseline_model1 = wm_model

    baseline_model2 = load_wm_model_func()
    baseline_model2.fit(
        attacker_data[0],
        attacker_data[1],
        batch_size=64,
        epochs=5,  #12
        verbose=1,
        validation_data=(test_data[0], test_data[1]))

    baseline_eval1 = baseline_model1.evaluate(trigger["keys"][0],
                                              trigger["keys"][1])[1]
    baseline_eval2 = baseline_model2.evaluate(trigger["keys"][0],
                                              trigger["keys"][1])[1]
    baseline = (baseline_eval1 * 100, baseline_eval2 * 100)

    return surr_model, (history_embed, history_surr, baseline)
def asiaccs_blackbox(
        load_dataset_func,  # Which dataset to choose. Should return training and testing data
        dataset_label,  # Label of the dataset (for caching)
        model,  # Model specification for wm_embedding
        surrogate_model,  # Model for blackbox attack
        wm_type='gaussian',  # logo or gaussian for wm embedding
        owner_data_size=25000,  # Data size of the owner
        total_owner_data_size=100000,  # Total data size of the owner with augmentation
        attacker_data_size=25000,  # Data size of the attacker
        total_attacker_data_size=100000,  # Total data size of the attacker with augmentation
        key_length=10000,  # How many keys to use for the embedding
        key_length_test=1000,  # How many keys to use for the testing
        epochs_embed=7,  # Train owners model
        epochs_surr=20,  # Train attackers model
        batchsize_embed=64,
        batchsize_surr=64,
        wm_class=5,
        cache_embed_wm=None,  # Filepath to store owners model & history
        cache_surr_model=None,  # Filepath to store attacker model & history
        verbose=True):
    """ Generates a watermarked surrogate model with the ASIACCS watermarking scheme
    """
    if verbose:
        print("[1/5] ({}) Blackbox Attack: Loading {} data".format(
            wm_type, dataset_label))
        print("      Owner data: {} Attacker Data: {}".format(
            total_owner_data_size, total_attacker_data_size))

    cache_embed_wm, cache_surr_model, = concat_labels_if_not_none(
        [cache_embed_wm, cache_surr_model], dataset_label)

    (all_x, all_y), test_data = load_dataset_func()

    if owner_data_size + attacker_data_size > len(all_x):
        raise RuntimeError(
            "Blackbox Attack data error! Trying to consume more training data than there is available!"
            " {}>{}".format(owner_data_size + attacker_data_size, len(all_x)))

    # Assure owner data and attacker data are mutually exclusive!
    owner_data, owner_data_from_cache = augment_data(
        set_to_augment=(all_x[:owner_data_size], all_y[:owner_data_size]),
        prefix=dataset_label,
        total_size=total_owner_data_size,
        use_cached_training_data="owner_data" + str(total_owner_data_size) +
        str(total_attacker_data_size),
        verbose=verbose)
    attacker_data, attacker_data_from_cache = augment_data(
        set_to_augment=(all_x[owner_data_size:owner_data_size +
                              attacker_data_size],
                        all_y[owner_data_size:owner_data_size +
                              attacker_data_size]),
        prefix=dataset_label,
        total_size=total_attacker_data_size,
        use_cached_training_data="attacker_data" + str(total_owner_data_size) +
        str(total_attacker_data_size),
        verbose=verbose)

    # Make sure to always regenerate both files if necessary
    if owner_data_from_cache != attacker_data_from_cache:
        raise RuntimeError(
            "Blackbox Attack data error! Sets are not mutually exclusive, please delete conflicting "
            "file ending in '{}'!".format(
                str(total_owner_data_size) + str(total_attacker_data_size)))

    if verbose:
        print(
            "[2/5] Generating ASIACCS watermarked images: Train({}) Test({})".
            format(key_length, key_length_test))

    trigger = load_wm_images_asiaccs(type=wm_type,
                                     dataset=owner_data,
                                     wm_class=wm_class,
                                     n_size=key_length)
    trigger_test = load_wm_images_asiaccs(type=wm_type,
                                          dataset=test_data,
                                          wm_class=wm_class,
                                          n_size=key_length_test)

    if verbose:
        print("[3/5] Training the original model and embedding the watermark")

    additional_callbacks = [
        AdditionalValidationSets([(trigger_test[0], trigger_test[1],
                                   'watermark_new')]),
        ShowErrorsCallback(dataset=trigger, prefix="Embed Trigger (Train)"),
        ShowErrorsCallback(dataset=trigger_test, prefix="Embed Trigger (Test)")
    ]
    wm_model, history_embed, trigger = embed_wm(
        model=model,
        epochs=epochs_embed,
        key_length=key_length,
        train_data=owner_data,
        trigger_set=trigger,
        test_data=test_data,
        wm_boost_factor=1,
        batchsize=batchsize_embed,
        additional_callbacks=additional_callbacks,
        cache_embed_wm=cache_embed_wm,
        verbose=False)
    if verbose:
        print("    Evaluating accuracy on attacker data...",
              end="",
              flush=True)
        acc_on_attacker_data = wm_model.evaluate(attacker_data[0],
                                                 attacker_data[1])
        print(
            "    Done! Original discriminators accuracy on attackers data: {}".
            format(acc_on_attacker_data[1]))
        print("[4/5] Labeling the attackers data with the original model")

    pred_y = wm_model.predict(attacker_data[0])
    attacker_data = attacker_data[0], pred_y

    print("[5/5] Training the surrogate model")
    additional_callbacks_surr = [
        AdditionalValidationSets([(trigger_test[0], trigger_test[1],
                                   'watermark_new')]),
        ShowErrorsCallback(dataset=trigger, prefix="BB Trigger (Train)"),
        ShowErrorsCallback(dataset=trigger_test, prefix="BB Trigger (Test)")
    ]
    surr_model, history_surr = blackbox_attack(
        surrogate_model=surrogate_model,
        epochs_surr=epochs_surr,
        train_data=attacker_data,
        trigger_set=trigger,
        test_data=test_data,
        batchsize=batchsize_surr,
        additional_callbacks=additional_callbacks_surr,
        cache_surr_model=cache_surr_model,
        verbose=False)

    return surr_model, (history_embed, history_surr)
def asiaccs_whitebox(
        load_dataset_func,  # Which dataset to choose. Should return training and testing data
        dataset_label,  # Label of the dataset (for caching)
        load_wm_model_func,  # Model for wm_embedding (needs params {"reg","optimizer","freeze_first_layers"})
        wm_type='gaussian',  # logo or gaussian for wm embedding
        owner_data_size=35000,
        total_owner_data_size=100000,
        key_length=10000,
        key_length_test=1000,
        attacker_data_size=15000,
        attacker_data_size_reg=10000,
        total_attacker_data_size=15000,
        epochs_embed=1,
        epochs_reg=1,
        epochs_surr=1,
        early_stopping_wm_reg=0.1,  # At which watermark accuracy to stop the whitebox attack
        patience_reg=0,
        lr_surr=0.001,  # Learning rate for the surrogate model
        freeze_first_layers=0,  # How many layers to freeze for surrogate model
        reg_whitebox=0.0,
        reg_surr=0.0,
        batchsize_embed=64,
        batchsize_reg=64,
        batchsize_surr=64,
        wm_class=5,
        cache_embed_wm=None,
        cache_reg_model=None,
        cache_surr_model=None,
        verbose=True):
    """ Generates two mutually exclusive data sets for the owner and the attacker. Trains a watermarked model for the
        owner with the ASIACCS embedding. Then runs a regularization and a surrogate model attack with the attackers
        data.
    """
    if verbose:
        print("[1/6] ASIACCS ({}) Whitebox Attack: Loading {} data".format(
            wm_type, dataset_label))
        print("      Owner data: {} Attacker Data: {}".format(
            total_owner_data_size, total_attacker_data_size))
    cache_embed_wm, cache_reg_model, cache_surr_model, = concat_labels_if_not_none(
        [cache_embed_wm, cache_reg_model, cache_surr_model], dataset_label)

    (all_x, all_y), test_data = load_dataset_func()

    if owner_data_size + attacker_data_size > len(all_x):
        raise RuntimeError(
            "Whitebox Attack data error! Trying to consume more training data than there is available!"
            " {}>{}".format(owner_data_size + attacker_data_size, len(all_x)))

    # Assure owner data and attacker data are mutually exclusive!
    owner_data, owner_data_from_cache = augment_data(
        set_to_augment=(all_x[:owner_data_size], all_y[:owner_data_size]),
        prefix=dataset_label,
        total_size=total_owner_data_size,
        use_cached_training_data="owner_data" + str(total_owner_data_size) +
        str(total_attacker_data_size),
        verbose=verbose)
    attacker_data, attacker_data_from_cache = augment_data(
        set_to_augment=(all_x[owner_data_size:owner_data_size +
                              attacker_data_size],
                        all_y[owner_data_size:owner_data_size +
                              attacker_data_size]),
        prefix=dataset_label,
        total_size=total_attacker_data_size,
        use_cached_training_data="attacker_data" + str(total_owner_data_size) +
        str(total_attacker_data_size),
        verbose=verbose)

    # Make sure to always regenerate both files if necessary
    if owner_data_from_cache != attacker_data_from_cache:
        raise RuntimeError(
            "Whitebox Attack data error! Sets are not mutually exclusive, please delete conflicting "
            "file ending in '{}'!".format(
                str(total_owner_data_size) + str(total_attacker_data_size)))

    if verbose:
        print(
            "[2/6] Generating ASIACCS watermarked images: Train({}) Test({})".
            format(key_length, key_length_test))

    trigger = load_wm_images_asiaccs(type=wm_type,
                                     dataset=owner_data,
                                     wm_class=wm_class,
                                     n_size=key_length)

    trigger_test = load_wm_images_asiaccs(type=wm_type,
                                          dataset=test_data,
                                          wm_class=wm_class,
                                          n_size=key_length_test)

    print("(Debug) Asiaccs whitebox:")
    print("Owner: {}, Attacker: {}, test: {}, trigger: {}, trigger_test: {}".
          format(owner_data[0].mean(), attacker_data[0].mean(),
                 test_data[0].mean(), trigger[0].mean(),
                 trigger_test[0].mean()))
    if verbose:
        print("[3/6] Training the original model and embedding the watermark")

    additional_callbacks = [
        AdditionalValidationSets([(trigger_test[0], trigger_test[1],
                                   'watermark_new')]),
        ShowErrorsCallback(dataset=trigger, prefix="Embed Trigger (Train)"),
        ShowErrorsCallback(dataset=trigger_test, prefix="Embed Trigger (Test)")
    ]
    wm_model, history_embed, trigger = embed_wm(
        model=load_wm_model_func(),
        epochs=epochs_embed,
        key_length=key_length,
        train_data=owner_data,
        trigger_set=trigger,
        test_data=test_data,
        wm_boost_factor=1,
        batchsize=batchsize_embed,
        additional_callbacks=additional_callbacks,
        cache_embed_wm=cache_embed_wm,
        verbose=False)
    if verbose:
        print("    Evaluating accuracy on attacker data...",
              end="",
              flush=True)
        acc_on_attacker_data = wm_model.evaluate(attacker_data[0],
                                                 attacker_data[1])
        print("    Done! Accuracy and loss: {}".format(acc_on_attacker_data))
        print("[4/6] Labeling the attackers data with the original model")

    pred_y = wm_model.predict(attacker_data[0])
    attacker_data = attacker_data[0], pred_y
    attacker_data_reg = (attacker_data[0][:attacker_data_size_reg],
                         attacker_data[1][:attacker_data_size_reg])

    if verbose:
        print(
            "[5/6] Removing the watermark with the regularization attack.. {}".
            format(freeze_first_layers))

    additional_callbacks2 = [
        AdditionalValidationSets([(trigger_test[0], trigger_test[1],
                                   'watermark_new')]),
        ShowErrorsCallback(dataset=trigger, prefix="WB Trigger (Train)"),
        ShowErrorsCallback(dataset=trigger_test, prefix="WB Trigger (Test)")
    ]
    surr_model_reg, reg_history = whitebox_attack(
        wm_model=wm_model,
        load_model_func=load_wm_model_func,
        load_func_kwargs={"reg": reg_whitebox},
        load_func_kwargs2={
            "reg": reg_surr,
            "optimizer": RMSprop(lr=lr_surr),
            "freeze_first_layers": freeze_first_layers
        },
        trigger_set=trigger,
        train_data=attacker_data_reg,
        test_data=test_data,
        batchsize=batchsize_reg,
        additional_callbacks=additional_callbacks2,
        epochs_reg=epochs_reg,
        early_stopping_wm=early_stopping_wm_reg,  # When to stop
        patience=patience_reg,
        cache_surr_model=cache_reg_model,
        verbose=False)

    print("[6/6] Training the surrogate model")
    additional_callbacks_surr = [
        AdditionalValidationSets([(trigger_test[0], trigger_test[1],
                                   'watermark_new')]),
        ShowErrorsCallback(dataset=trigger, prefix="BB Trigger (Train)"),
        ShowErrorsCallback(dataset=trigger_test, prefix="BB Trigger (Test)")
    ]
    surr_model, history_surr = blackbox_attack(
        surrogate_model=surr_model_reg,
        epochs_surr=epochs_surr,
        train_data=attacker_data,
        trigger_set=trigger,
        test_data=test_data,
        batchsize=batchsize_surr,
        additional_callbacks=additional_callbacks_surr,
        cache_surr_model=cache_surr_model,
        verbose=False)

    return surr_model, (history_embed, reg_history, history_surr)
Exemplo n.º 6
0
def countermark_blackbox(
        load_dataset_func,  # Function that loads the training and testing data
        model,  # Model specification for wm_embedding
        surrogate_model,  # Model specification for surrogate model training
        load_trigger_func,  # Function for loading the watermark set
        dataset_label="",  # Chosen label for the dataset (if caching is enabled)
        key_length=100,
        wm_boost_factor=100,
        owner_data_size=35000,
        total_owner_data_size=100000,
        attacker_data_size=15000,
        total_attacker_data_size=100000,
        epochs_embed=10,
        epochs_surr=20,
        batchsize_embed=64,
        batchsize_surr=64,
        cache_embed_wm=None,
        cache_surr_model=None,
        verbose=True):
    """ Generates a model that carries a COUNTERMARK watermark
        and a blackbox surrogate model that (hopefully) also carries the COUNTERMARK watermark
    """
    if verbose:
        print("[1/4] Fingerprint Blackbox Attack: Loading {} data".format(dataset_label))
        print("      Owner data: {} Attacker Data: {}".format(total_owner_data_size, total_attacker_data_size))

    cache_embed_wm, cache_surr_model, = concat_labels_if_not_none([cache_embed_wm, cache_surr_model], dataset_label)

    (all_x, all_y), test_data = load_dataset_func()

    if owner_data_size + attacker_data_size > len(all_x):
        raise RuntimeError("Blackbox Attack data error! Trying to consume more training data than there is available!"
                           " {}>{}".format(owner_data_size + attacker_data_size, len(all_x)))

    # Assure owner data and attacker data are mutually exclusive!
    owner_data, owner_data_from_cache = augment_data(
        set_to_augment=(all_x[:owner_data_size], all_y[:owner_data_size]),
        prefix=dataset_label,
        total_size=total_owner_data_size,
        use_cached_training_data="owner_data" + str(total_owner_data_size) + str(total_attacker_data_size),
        verbose=verbose)
    attacker_data, attacker_data_from_cache = augment_data(
        set_to_augment=(all_x[owner_data_size:owner_data_size + attacker_data_size],
                        all_y[owner_data_size:owner_data_size + attacker_data_size]),
        prefix=dataset_label,
        total_size=total_attacker_data_size,
        use_cached_training_data="attacker_data" + str(total_owner_data_size) + str(total_attacker_data_size),
        verbose=verbose)

    # Make sure to always regenerate both files if necessary
    if owner_data_from_cache != attacker_data_from_cache:
        raise RuntimeError("Blackbox Attack data error! Sets are not mutually exclusive, please delete conflicting "
                           "file ending in '{}'!".format(str(total_owner_data_size) + str(total_attacker_data_size)))

    if verbose:
        print("[2/4] Training the network with {} keys each repeated {} times)".format(key_length, wm_boost_factor))

    trigger = load_trigger_func()
    additional_callbacks = [ShowErrorsCallback(dataset=trigger, prefix="Embed Trigger")]
    wm_model, history_embed, trigger = embed_wm(model=model,
                                                epochs=epochs_embed,
                                                train_data=owner_data,
                                                trigger_set=trigger,
                                                test_data=test_data,
                                                key_length=key_length,
                                                wm_boost_factor=wm_boost_factor,
                                                batchsize=batchsize_embed,
                                                additional_callbacks=additional_callbacks,
                                                cache_embed_wm=cache_embed_wm,
                                                verbose=False)
    if verbose:
        print("    Evaluating accuracy on attacker data...", end="", flush=True)
        acc_on_attacker_data = wm_model.evaluate(attacker_data[0], attacker_data[1])
        print("    Done! Original discriminators accuracy on attackers data: {}".format(acc_on_attacker_data[1]))
        print("[3/4] Labeling the attackers data with the original model")

    pred_y = wm_model.predict(attacker_data[0])
    attacker_data = attacker_data[0], pred_y

    if verbose:
        print("[4/4] Training the surrogate model")

    additional_callbacks = [ShowErrorsCallback(dataset=trigger, prefix="BB Trigger")]
    surr_model, history_surr = blackbox_attack(surrogate_model=surrogate_model,
                                               epochs_surr=epochs_surr,
                                               trigger_set=trigger,
                                               train_data=attacker_data,
                                               test_data=test_data,
                                               batchsize=batchsize_surr,
                                               additional_callbacks=additional_callbacks,
                                               cache_surr_model=cache_surr_model,
                                               verbose=False)

    return surr_model, (history_embed, history_surr)
def usenix_whitebox(
        load_dataset_func,  # Which dataset to choose. Should return training and testing data
        dataset_label,  # Label of the dataset (for caching)
        load_wm_model_func,  # Model specification for wm_embedding
        owner_data_size=35000,
        total_owner_data_size=100000,
        key_length=35,
        wm_boost_factor=1000,
        attacker_data_size=15000,
        attacker_data_size_reg=10000,
        total_attacker_data_size=15000,
        epochs_embed=10,
        epochs_reg=30,
        epochs_surr=10,
        early_stopping_wm_reg=0.1,  # At which watermark accuracy to stop the whitebox attack
        patience_reg=2,
        lr_surr=0.001,  # Learning rate for the surrogate model
        freeze_first_layers=0,  # How many layers to freeze for surrogate model
        reg_whitebox=0.0,
        reg_surr=0.0,
        batchsize_embed=64,
        batchsize_reg=64,
        batchsize_surr=64,
        cache_embed_wm=None,
        cache_reg_model=None,
        cache_surr_model=None,
        verbose=True):
    """ Generates two mutually exclusive data sets for the owner and the attacker. Trains a watermarked model for the
        owner with the ASIACCS embedding. Then runs a regularization and a surrogate model attack with the attackers
        data.
    """
    if verbose:
        print("[1/5] USENIX Whitebox Attack: Loading {} data".format(
            dataset_label))
        print("      Owner data: {} Attacker Data: {}".format(
            total_owner_data_size, total_attacker_data_size))
    cache_embed_wm, cache_reg_model, cache_surr_model, = concat_labels_if_not_none(
        [cache_embed_wm, cache_reg_model, cache_surr_model], dataset_label)
    (all_x, all_y), test_data = load_dataset_func()

    if owner_data_size + attacker_data_size > len(all_x):
        raise RuntimeError(
            "Whitebox Attack data error! Trying to consume more training data than there is available!"
            " {}>{}".format(owner_data_size + attacker_data_size, len(all_x)))

    # Assure owner data and attacker data are mutually exclusive!
    owner_data, owner_data_from_cache = augment_data(
        set_to_augment=(all_x[:owner_data_size], all_y[:owner_data_size]),
        prefix=dataset_label,
        total_size=total_owner_data_size,
        use_cached_training_data="owner_data" + str(total_owner_data_size) +
        str(total_attacker_data_size),
        verbose=verbose)
    attacker_data, attacker_data_from_cache = augment_data(
        set_to_augment=(all_x[owner_data_size:owner_data_size +
                              attacker_data_size],
                        all_y[owner_data_size:owner_data_size +
                              attacker_data_size]),
        prefix=dataset_label,
        total_size=total_attacker_data_size,
        use_cached_training_data="attacker_data" + str(total_owner_data_size) +
        str(total_attacker_data_size),
        verbose=verbose)

    # Make sure to always regenerate both files if necessary
    if owner_data_from_cache != attacker_data_from_cache:
        raise RuntimeError(
            "Whitebox Attack data error! Sets are not mutually exclusive, please delete conflicting "
            "file ending in '{}'!".format(
                str(total_owner_data_size) + str(total_attacker_data_size)))

    if verbose:
        print(
            "[2/5] Training the network with {} keys each repeated {} times)".
            format(key_length, wm_boost_factor))

    trigger = load_wm_images_usenix(imgsize=all_x[0].shape)
    additional_callbacks = [
        ShowErrorsCallback(dataset=trigger, prefix="Embed Trigger")
    ]
    wm_model, history_embed, trigger = embed_wm(
        model=load_wm_model_func(),
        epochs=epochs_embed,
        key_length=key_length,
        train_data=owner_data,
        trigger_set=trigger,
        test_data=test_data,
        wm_boost_factor=wm_boost_factor,
        batchsize=batchsize_embed,
        additional_callbacks=additional_callbacks,
        cache_embed_wm=cache_embed_wm,
        verbose=False)
    if verbose:
        print("    Evaluating accuracy on attacker data...",
              end="",
              flush=True)
        acc_on_attacker_data = wm_model.evaluate(attacker_data[0],
                                                 attacker_data[1])
        print(
            "    Done! Original discriminators accuracy on attackers data: {}".
            format(acc_on_attacker_data[1]))
        print("[3/5] Labeling the attackers data with the original model")

    pred_y = wm_model.predict(attacker_data[0])
    attacker_data = attacker_data[0], pred_y
    attacker_data_reg = (attacker_data[0][0:attacker_data_size_reg],
                         attacker_data[1][0:attacker_data_size_reg])

    if verbose:
        print(
            "[4/5] Removing the watermark with the regularization attack.. {}".
            format(freeze_first_layers))

    additional_callbacks = [
        ShowErrorsCallback(dataset=trigger, prefix="WB Trigger")
    ]
    surr_model_reg, history_reg = whitebox_attack(
        wm_model=wm_model,
        load_model_func=load_wm_model_func,
        load_func_kwargs={"reg": reg_whitebox},
        load_func_kwargs2={
            "reg": reg_surr,
            "optimizer": RMSprop(lr=lr_surr),
            "freeze_first_layers": freeze_first_layers
        },
        trigger_set=trigger,
        train_data=attacker_data_reg,
        test_data=test_data,
        batchsize=batchsize_reg,
        epochs_reg=epochs_reg,
        early_stopping_wm=early_stopping_wm_reg,  # When to stop
        patience=patience_reg,
        additional_callbacks=additional_callbacks,
        cache_surr_model=cache_reg_model,
        verbose=False)

    if verbose:
        print("[5/5] Training the surrogate model")

    additional_callbacks = [
        ShowErrorsCallback(dataset=trigger, prefix="BB Trigger")
    ]
    surr_model, history_surr = blackbox_attack(
        surrogate_model=surr_model_reg,
        epochs_surr=epochs_surr,
        train_data=attacker_data,
        trigger_set=trigger,
        test_data=test_data,
        batchsize=batchsize_surr,
        additional_callbacks=additional_callbacks,
        cache_surr_model=cache_surr_model,
        verbose=False)

    return surr_model, (history_embed, history_reg, history_surr)