def _blackmarks_embed( model, x_train, y_train, x_test, y_test, sess, eps=0.25, batch_size=128, epochs=5, min_delta=0.002, # Minimal improvement per step patience=2, wm_epochs=5, fine_tuning=True, load_wm_model_func=None, weight_path='../../tmp/mnist_cnn_weights.hdf5', retrain=True): if retrain: try: model.load_weights(weight_path) except Exception as e: print(e) print('Cannot find pretrained weight. Start training...') checkpoint = ModelCheckpoint(weight_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min', save_weights_only=True) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test), callbacks=[checkpoint]) # Step 1: get the key length and we takes 10*keylength part of the training data to get the cluster key_length = len(list(sign)) x_train = x_train[:key_length * 100] functor = K.function([model.input, K.learning_phase()], [model.layers[-2].output]) activation_out = functor([x_train, 1.])[0] activation_out = np.mean(activation_out, axis=0) activation_out = activation_out.reshape(-1, 1) kmeans = KMeans(n_clusters=2).fit(activation_out) clusters = kmeans.labels_ cluster_one = np.where(clusters == 1)[0] cluster_zero = np.where(clusters == 0)[0] print(clusters) print(cluster_one) print(cluster_zero) # Step 2: classify the training input into clusters and assign them a target adversarial class label from a different cluster preds_op = model(model.inputs[0]) preds_one_hot = sess.run(preds_op, feed_dict={model.inputs[0]: x_train}) preds = np.argmax(preds_one_hot, axis=1) preds_cluster = np.isin(preds, cluster_one).astype('int') preds_target = [ uniform_select(cluster_one) if i == 0 else uniform_select(cluster_zero) for i in list(preds_cluster) ] print(preds_target) preds_target_one_hot = keras.utils.to_categorical( preds_target, num_classes) # Step 3: Generate adversarial examples wrap = KerasModelWrapper(model) fgsm = FastGradientMethod(wrap) fgsm_params = {'eps': eps, 'y_target': preds_target_one_hot} adv_x_op = fgsm.generate(model.inputs[0], **fgsm_params) adv_x = sess.run(adv_x_op, feed_dict={model.inputs[0]: x_train}) history_wm = AdditionalValidationSets([(adv_x, preds_one_hot, 'watermark')]) time_hist = TimeHistory() es = EarlyStopping( monitor='acc', mode='max', min_delta=min_delta, patience=patience, restore_best_weights=True) # 0.5% improvement per step additional_callbacks = [ ShowErrorsCallback(dataset=(adv_x, preds_one_hot), prefix="Embed Trigger (Train)") ] callbacks = [] if fine_tuning: callbacks = [time_hist, *additional_callbacks, history_wm, es] model.fit(adv_x, preds_one_hot, batch_size=batch_size, epochs=wm_epochs, verbose=1, validation_data=(x_test, y_test), callbacks=callbacks) # print("Fine tuning finished, start retraining...") # x_train = np.vstack((x_train, adv_x)) # y_train = np.vstack((y_train, preds_one_hot)) # order = np.arange(x_train.shape[0]) # np.random.shuffle(order) # x_train = x_train[order] # y_train = y_train[order] # model.fit(x_train, # y_train, # batch_size=batch_size, # epochs=wm_epochs, # verbose=1, # validation_data=(x_test, y_test), # callbacks=[time_hist, *additional_callbacks, history_wm, es]) history_wm.history = merge_histories( [history_wm, time_hist, *additional_callbacks, es]) # Step 4: fine tuning the model to embed the watermark # We filter out the keys that are able to be identified by the nonwatermarked model model_no_wm = load_wm_model_func() model_no_wm.load_weights(weight_path) pred_no_wm_one_hot_op = model_no_wm(model.inputs[0]) pred_wm_one_hot_op = model(model.inputs[0]) pred_no_wm_one_hot, pred_wm_one_hot = sess.run( [pred_no_wm_one_hot_op, pred_wm_one_hot_op], feed_dict={model.inputs[0]: adv_x}) pred_no_wm = np.argmax(pred_no_wm_one_hot, axis=1) pred_wm = np.argmax(pred_wm_one_hot, axis=1) key_candidate_cond1 = np.where(pred_no_wm != preds)[0] key_candidate_cond2 = np.where(pred_wm == preds)[0] key_candidate = np.intersect1d(key_candidate_cond1, key_candidate_cond2) print(key_candidate) wm_keys = adv_x[key_candidate] wm_keys_cluster = preds_cluster[key_candidate] wm_keys_one = np.where(wm_keys_cluster == 1)[0] wm_keys_zero = np.where(wm_keys_cluster == 0)[0] print(wm_keys_one[:np.sum(sign)]) print(wm_keys_zero[:key_length - np.sum(sign)]) acc1 = 0 acc2 = 0 embeded_keys = [] for i in list(sign): if (i == 1): embeded_keys.append(wm_keys[wm_keys_one[acc1]]) acc1 = acc1 + 1 else: embeded_keys.append(wm_keys[wm_keys_zero[acc2]]) acc2 = acc2 + 1 embeded_keys = np.array(embeded_keys) print("#########HERE##########") print(embeded_keys.shape) print("#######################") cluster = (cluster_zero, cluster_one) trigger = {} trigger["keys"] = (embeded_keys, keras.utils.to_categorical(sign, num_classes)) trigger["clusters"] = (cluster_zero, cluster_one, sign) # need to change embeded_keys to history return model, history_wm, trigger
def adversarial_whitebox( load_dataset_func, # Which dataset to choose. Should return training and testing data dataset_label, # Label of the dataset (for caching) load_wm_model_func, # Model for wm_embedding (needs params {"reg","optimizer","freeze_first_layers"}) wm_embed_func, owner_data_size=30000, total_owner_data_size=30000, attacker_data_size=15000, attacker_data_size_reg=10000, total_attacker_data_size=15000, epochs_reg=30, #30 epochs_surr=10, #10 early_stopping_wm_reg=0.2, # At which watermark accuracy to stop the whitebox attack patience_reg=2, lr_surr=0.001, # Learning rate for the surrogate model freeze_first_layers=0, # How many layers to freeze for surrogate model reg_whitebox=0.003, reg_surr=0.0, batchsize_embed=64, batchsize_reg=64, batchsize_surr=64, wm_class=5, cache_embed_wm=None, cache_reg_model=None, cache_surr_model=None, verbose=False, fine_tuning=True, weight_path='../../tmp/mnist_cnn_weights.hdf5', cluster=False): sess = tf.Session() K.set_session(sess) cache_embed_wm, cache_reg_model, cache_surr_model, = concat_labels_if_not_none( [cache_embed_wm, cache_reg_model, cache_surr_model], dataset_label) (all_x, all_y), test_data = load_dataset_func() if owner_data_size + attacker_data_size > len(all_x): raise RuntimeError( "Whitebox Attack data error! Trying to consume more training data than there is available!" " {}>{}".format(owner_data_size + attacker_data_size, len(all_x))) owner_data, owner_data_from_cache = augment_data( set_to_augment=(all_x[:owner_data_size], all_y[:owner_data_size]), prefix=dataset_label, total_size=total_owner_data_size, use_cached_training_data="owner_data" + str(total_owner_data_size) + str(total_attacker_data_size), verbose=verbose) attacker_data, attacker_data_from_cache = augment_data( set_to_augment=(all_x[owner_data_size:owner_data_size + attacker_data_size], all_y[owner_data_size:owner_data_size + attacker_data_size]), prefix=dataset_label, total_size=total_attacker_data_size, use_cached_training_data="attacker_data" + str(total_owner_data_size) + str(total_attacker_data_size), verbose=verbose) # Make sure to always regenerate both files if necessary if owner_data_from_cache != attacker_data_from_cache: raise RuntimeError( "Whitebox Attack data error! Sets are not mutually exclusive, please delete conflicting " "file ending in '{}'!".format( str(total_owner_data_size) + str(total_attacker_data_size))) wm_model, history_embed, trigger = wm_embed_func( load_wm_model_func(), owner_data[0], owner_data[1], test_data[0], test_data[1], sess, fine_tuning=fine_tuning, load_wm_model_func=load_wm_model_func) pred_y = wm_model.predict(attacker_data[0]) attacker_data = attacker_data[0], pred_y attacker_data_reg = (attacker_data[0][:attacker_data_size_reg], attacker_data[1][:attacker_data_size_reg]) additional_callbacks2 = [ ShowErrorsCallback(dataset=trigger["keys"], prefix="WB Trigger") ] surr_model_reg, reg_history = whitebox_attack( wm_model=wm_model, load_model_func=load_wm_model_func, load_func_kwargs={"reg": reg_whitebox}, load_func_kwargs2={ "reg": reg_surr, "optimizer": RMSprop(lr=lr_surr), "freeze_first_layers": freeze_first_layers }, trigger_set=trigger, train_data=attacker_data_reg, test_data=test_data, batchsize=batchsize_reg, epochs_reg=epochs_reg, additional_callbacks=additional_callbacks2, early_stopping_wm=early_stopping_wm_reg, # When to stop patience=patience_reg, cache_surr_model=cache_reg_model, verbose=False, cluster=cluster) additional_callbacks_surr = [ ShowErrorsCallback(dataset=trigger["keys"], prefix="BB Trigger (Train)") ] # randomized blackbox # comment out if you do not want perform this on attacker data # random_selection = np.random.random_sample(attacker_data_size) # random_selection = (random_selection < 0.005).astype('int64') # random_target = np.random.randint(10, size=sum(random_selection)) # random_index = np.where(random_selection == 1)[0] # attacker_data[1][random_index] = keras.utils.to_categorical( # random_target, num_classes) # print(sum(random_selection), " attacker data is twisted...") surr_model, history_surr = blackbox_attack( surrogate_model=surr_model_reg, epochs_surr=epochs_surr, train_data=attacker_data, trigger_set=trigger, test_data=test_data, batchsize=batchsize_surr, additional_callbacks=additional_callbacks_surr, cache_surr_model=cache_surr_model, verbose=False, cluster=cluster) # After the black-box attack, try to embed the watermark again to further # reduce the old watermark retention. print("####################################################") print("Watermark retention BEFORE embeding new watermark...") print(surr_model.evaluate(trigger["keys"][0], trigger["keys"][1])) print(surr_model.evaluate(test_data[0], test_data[1])) print("####################################################") surr_model, history_embed, _ = wm_embed_func( surr_model, attacker_data[0], attacker_data[1], test_data[0], test_data[1], sess, fine_tuning=fine_tuning, load_wm_model_func=load_wm_model_func, retrain=False) print("####################################################") print("Watermark retention AFTER embeding new watermark...") print(surr_model.evaluate(trigger["keys"][0], trigger["keys"][1])) print(surr_model.evaluate(test_data[0], test_data[1])) print("####################################################") baseline_model1 = load_wm_model_func() baseline_model1.load_weights(weight_path) baseline_model2 = load_wm_model_func() baseline_model2.fit(attacker_data[0], attacker_data[1], batch_size=64, epochs=5, verbose=1, validation_data=(test_data[0], test_data[1])) baseline_eval1 = baseline_model1.evaluate(trigger["keys"][0], trigger["keys"][1])[0] baseline_eval2 = baseline_model2.evaluate(trigger["keys"][0], trigger["keys"][1])[0] print("This is the baseline:", baseline_eval1) print("This is the baseline:", baseline_eval2) print(baseline_model1.evaluate(owner_data[0], owner_data[1])) baseline = (baseline_eval1 / 100, baseline_eval2 / 100) return surr_model, (history_embed, reg_history, history_surr, baseline)
def adversarial_blackbox( load_dataset_func, # Which dataset to choose. Should return training and testing data dataset_label, # Label of the dataset (for caching) load_wm_model_func, # Model specification for owners model wm_embed_func, # Watermark embedding function owner_data_size=35000, # Data to load from repository total_owner_data_size=35000, # Total data (with augmentation) attacker_data_size=25000, # Data to load from repository total_attacker_data_size=25000, # Total data (with augmentation) falsify_attacker_data=0.05, # Ratio of labels to re-label randomly epochs_wm=5, # Max number of epochs for owners model batchsize_wm=64, # Batchsize for owners model epochs_surr=20, # Max number of epochs for blackbox attack model batchsize_surr=64, # Batch size for blackbox attack cache_surr_model=None, # Whether to save the model (path required) weight_path='../../tmp/mnist_cnn_weights.hdf5', fine_tuning=True, cluster=False, rand_bb=False, verbose=False): """ Blackbox attack on adversarial embedding """ sess = tf.Session() K.set_session(sess) # Load the owners model surrogate_model = load_wm_model_func() (all_x, all_y), test_data = load_dataset_func() if owner_data_size + attacker_data_size > len(all_x): raise RuntimeError( "Blackbox Attack data error! Trying to consume more training data than there is available!" " {}>{}".format(owner_data_size + attacker_data_size, len(all_x))) # Load owner and attacker data and assure they are mutually exclusive! owner_data, loaded_owner_from_cache = augment_data( set_to_augment=(all_x[:owner_data_size], all_y[:owner_data_size]), prefix=dataset_label, total_size=total_owner_data_size, use_cached_training_data="owner_data" + str(total_owner_data_size) + str(total_attacker_data_size), verbose=verbose) attacker_data, loaded_attacker_from_cache = augment_data( set_to_augment=(all_x[owner_data_size:owner_data_size + attacker_data_size], all_y[owner_data_size:owner_data_size + attacker_data_size]), prefix=dataset_label, total_size=total_attacker_data_size, use_cached_training_data="attacker_data" + str(total_owner_data_size) + str(total_attacker_data_size), verbose=verbose) if loaded_owner_from_cache != loaded_attacker_from_cache: raise RuntimeError( "Blackbox Attack data error! One set was loaded from cache and the other wasn't. Cannot ensure " "that sets don't overlap. Please delete conflicting file ending in '{}'!".format( str(total_owner_data_size) + str(total_attacker_data_size))) # Create the owners model with the embedded watermark wm_model, history_embed, trigger = wm_embed_func( load_wm_model_func(), owner_data[0], owner_data[1], test_data[0], test_data[1], sess, fine_tuning=fine_tuning, load_wm_model_func=load_wm_model_func) # Label the attackers data pred_y = wm_model.predict(attacker_data[0]) attacker_data = attacker_data[0], pred_y additional_callbacks = [ ShowErrorsCallback(dataset=trigger["keys"], prefix="BB Trigger") ] # Give 0.5% of the training data false value random_selection = np.random.random_sample(attacker_data_size) random_selection = (random_selection < falsify_attacker_data).astype('int64') random_target = np.random.randint(10, size=sum(random_selection)) random_index = np.where(random_selection == 1)[0] attacker_data[1][random_index] = keras.utils.to_categorical(random_target, num_classes) print("##############################################") print("########### Starting Blackbox Attack #########") # Start the blackbox attack surr_model, history_surr = blackbox_attack( surrogate_model=surrogate_model, epochs_surr=epochs_surr, trigger_set=trigger, train_data=MNISTSequence(attacker_data[0], attacker_data[1], batchsize_surr) if rand_bb else attacker_data, test_data=test_data, batchsize=batchsize_surr, additional_callbacks=additional_callbacks, cache_surr_model=cache_surr_model, verbose=False, cluster=cluster) # After the black-box attack, try to embed the watermark again to further # reduce the old watermark retention. print("####################################################") print("Watermark retention BEFORE embeding new watermark...") print(surr_model.evaluate(trigger["keys"][0], trigger["keys"][1])) print(surr_model.evaluate(test_data[0], test_data[1])) print("####################################################") surr_model, history_embed, _ = wm_embed_func( surr_model, attacker_data[0], attacker_data[1], test_data[0], test_data[1], sess, fine_tuning=fine_tuning, load_wm_model_func=load_wm_model_func, retrain=False) print("####################################################") print("Watermark retention AFTER embeding new watermark...") print(surr_model.evaluate(trigger["keys"][0], trigger["keys"][1])) print(surr_model.evaluate(test_data[0], test_data[1])) print("####################################################") baseline_model1 = wm_model baseline_model2 = load_wm_model_func() baseline_model2.fit( attacker_data[0], attacker_data[1], batch_size=64, epochs=5, #12 verbose=1, validation_data=(test_data[0], test_data[1])) baseline_eval1 = baseline_model1.evaluate(trigger["keys"][0], trigger["keys"][1])[1] baseline_eval2 = baseline_model2.evaluate(trigger["keys"][0], trigger["keys"][1])[1] baseline = (baseline_eval1 * 100, baseline_eval2 * 100) return surr_model, (history_embed, history_surr, baseline)
def asiaccs_blackbox( load_dataset_func, # Which dataset to choose. Should return training and testing data dataset_label, # Label of the dataset (for caching) model, # Model specification for wm_embedding surrogate_model, # Model for blackbox attack wm_type='gaussian', # logo or gaussian for wm embedding owner_data_size=25000, # Data size of the owner total_owner_data_size=100000, # Total data size of the owner with augmentation attacker_data_size=25000, # Data size of the attacker total_attacker_data_size=100000, # Total data size of the attacker with augmentation key_length=10000, # How many keys to use for the embedding key_length_test=1000, # How many keys to use for the testing epochs_embed=7, # Train owners model epochs_surr=20, # Train attackers model batchsize_embed=64, batchsize_surr=64, wm_class=5, cache_embed_wm=None, # Filepath to store owners model & history cache_surr_model=None, # Filepath to store attacker model & history verbose=True): """ Generates a watermarked surrogate model with the ASIACCS watermarking scheme """ if verbose: print("[1/5] ({}) Blackbox Attack: Loading {} data".format( wm_type, dataset_label)) print(" Owner data: {} Attacker Data: {}".format( total_owner_data_size, total_attacker_data_size)) cache_embed_wm, cache_surr_model, = concat_labels_if_not_none( [cache_embed_wm, cache_surr_model], dataset_label) (all_x, all_y), test_data = load_dataset_func() if owner_data_size + attacker_data_size > len(all_x): raise RuntimeError( "Blackbox Attack data error! Trying to consume more training data than there is available!" " {}>{}".format(owner_data_size + attacker_data_size, len(all_x))) # Assure owner data and attacker data are mutually exclusive! owner_data, owner_data_from_cache = augment_data( set_to_augment=(all_x[:owner_data_size], all_y[:owner_data_size]), prefix=dataset_label, total_size=total_owner_data_size, use_cached_training_data="owner_data" + str(total_owner_data_size) + str(total_attacker_data_size), verbose=verbose) attacker_data, attacker_data_from_cache = augment_data( set_to_augment=(all_x[owner_data_size:owner_data_size + attacker_data_size], all_y[owner_data_size:owner_data_size + attacker_data_size]), prefix=dataset_label, total_size=total_attacker_data_size, use_cached_training_data="attacker_data" + str(total_owner_data_size) + str(total_attacker_data_size), verbose=verbose) # Make sure to always regenerate both files if necessary if owner_data_from_cache != attacker_data_from_cache: raise RuntimeError( "Blackbox Attack data error! Sets are not mutually exclusive, please delete conflicting " "file ending in '{}'!".format( str(total_owner_data_size) + str(total_attacker_data_size))) if verbose: print( "[2/5] Generating ASIACCS watermarked images: Train({}) Test({})". format(key_length, key_length_test)) trigger = load_wm_images_asiaccs(type=wm_type, dataset=owner_data, wm_class=wm_class, n_size=key_length) trigger_test = load_wm_images_asiaccs(type=wm_type, dataset=test_data, wm_class=wm_class, n_size=key_length_test) if verbose: print("[3/5] Training the original model and embedding the watermark") additional_callbacks = [ AdditionalValidationSets([(trigger_test[0], trigger_test[1], 'watermark_new')]), ShowErrorsCallback(dataset=trigger, prefix="Embed Trigger (Train)"), ShowErrorsCallback(dataset=trigger_test, prefix="Embed Trigger (Test)") ] wm_model, history_embed, trigger = embed_wm( model=model, epochs=epochs_embed, key_length=key_length, train_data=owner_data, trigger_set=trigger, test_data=test_data, wm_boost_factor=1, batchsize=batchsize_embed, additional_callbacks=additional_callbacks, cache_embed_wm=cache_embed_wm, verbose=False) if verbose: print(" Evaluating accuracy on attacker data...", end="", flush=True) acc_on_attacker_data = wm_model.evaluate(attacker_data[0], attacker_data[1]) print( " Done! Original discriminators accuracy on attackers data: {}". format(acc_on_attacker_data[1])) print("[4/5] Labeling the attackers data with the original model") pred_y = wm_model.predict(attacker_data[0]) attacker_data = attacker_data[0], pred_y print("[5/5] Training the surrogate model") additional_callbacks_surr = [ AdditionalValidationSets([(trigger_test[0], trigger_test[1], 'watermark_new')]), ShowErrorsCallback(dataset=trigger, prefix="BB Trigger (Train)"), ShowErrorsCallback(dataset=trigger_test, prefix="BB Trigger (Test)") ] surr_model, history_surr = blackbox_attack( surrogate_model=surrogate_model, epochs_surr=epochs_surr, train_data=attacker_data, trigger_set=trigger, test_data=test_data, batchsize=batchsize_surr, additional_callbacks=additional_callbacks_surr, cache_surr_model=cache_surr_model, verbose=False) return surr_model, (history_embed, history_surr)
def asiaccs_whitebox( load_dataset_func, # Which dataset to choose. Should return training and testing data dataset_label, # Label of the dataset (for caching) load_wm_model_func, # Model for wm_embedding (needs params {"reg","optimizer","freeze_first_layers"}) wm_type='gaussian', # logo or gaussian for wm embedding owner_data_size=35000, total_owner_data_size=100000, key_length=10000, key_length_test=1000, attacker_data_size=15000, attacker_data_size_reg=10000, total_attacker_data_size=15000, epochs_embed=1, epochs_reg=1, epochs_surr=1, early_stopping_wm_reg=0.1, # At which watermark accuracy to stop the whitebox attack patience_reg=0, lr_surr=0.001, # Learning rate for the surrogate model freeze_first_layers=0, # How many layers to freeze for surrogate model reg_whitebox=0.0, reg_surr=0.0, batchsize_embed=64, batchsize_reg=64, batchsize_surr=64, wm_class=5, cache_embed_wm=None, cache_reg_model=None, cache_surr_model=None, verbose=True): """ Generates two mutually exclusive data sets for the owner and the attacker. Trains a watermarked model for the owner with the ASIACCS embedding. Then runs a regularization and a surrogate model attack with the attackers data. """ if verbose: print("[1/6] ASIACCS ({}) Whitebox Attack: Loading {} data".format( wm_type, dataset_label)) print(" Owner data: {} Attacker Data: {}".format( total_owner_data_size, total_attacker_data_size)) cache_embed_wm, cache_reg_model, cache_surr_model, = concat_labels_if_not_none( [cache_embed_wm, cache_reg_model, cache_surr_model], dataset_label) (all_x, all_y), test_data = load_dataset_func() if owner_data_size + attacker_data_size > len(all_x): raise RuntimeError( "Whitebox Attack data error! Trying to consume more training data than there is available!" " {}>{}".format(owner_data_size + attacker_data_size, len(all_x))) # Assure owner data and attacker data are mutually exclusive! owner_data, owner_data_from_cache = augment_data( set_to_augment=(all_x[:owner_data_size], all_y[:owner_data_size]), prefix=dataset_label, total_size=total_owner_data_size, use_cached_training_data="owner_data" + str(total_owner_data_size) + str(total_attacker_data_size), verbose=verbose) attacker_data, attacker_data_from_cache = augment_data( set_to_augment=(all_x[owner_data_size:owner_data_size + attacker_data_size], all_y[owner_data_size:owner_data_size + attacker_data_size]), prefix=dataset_label, total_size=total_attacker_data_size, use_cached_training_data="attacker_data" + str(total_owner_data_size) + str(total_attacker_data_size), verbose=verbose) # Make sure to always regenerate both files if necessary if owner_data_from_cache != attacker_data_from_cache: raise RuntimeError( "Whitebox Attack data error! Sets are not mutually exclusive, please delete conflicting " "file ending in '{}'!".format( str(total_owner_data_size) + str(total_attacker_data_size))) if verbose: print( "[2/6] Generating ASIACCS watermarked images: Train({}) Test({})". format(key_length, key_length_test)) trigger = load_wm_images_asiaccs(type=wm_type, dataset=owner_data, wm_class=wm_class, n_size=key_length) trigger_test = load_wm_images_asiaccs(type=wm_type, dataset=test_data, wm_class=wm_class, n_size=key_length_test) print("(Debug) Asiaccs whitebox:") print("Owner: {}, Attacker: {}, test: {}, trigger: {}, trigger_test: {}". format(owner_data[0].mean(), attacker_data[0].mean(), test_data[0].mean(), trigger[0].mean(), trigger_test[0].mean())) if verbose: print("[3/6] Training the original model and embedding the watermark") additional_callbacks = [ AdditionalValidationSets([(trigger_test[0], trigger_test[1], 'watermark_new')]), ShowErrorsCallback(dataset=trigger, prefix="Embed Trigger (Train)"), ShowErrorsCallback(dataset=trigger_test, prefix="Embed Trigger (Test)") ] wm_model, history_embed, trigger = embed_wm( model=load_wm_model_func(), epochs=epochs_embed, key_length=key_length, train_data=owner_data, trigger_set=trigger, test_data=test_data, wm_boost_factor=1, batchsize=batchsize_embed, additional_callbacks=additional_callbacks, cache_embed_wm=cache_embed_wm, verbose=False) if verbose: print(" Evaluating accuracy on attacker data...", end="", flush=True) acc_on_attacker_data = wm_model.evaluate(attacker_data[0], attacker_data[1]) print(" Done! Accuracy and loss: {}".format(acc_on_attacker_data)) print("[4/6] Labeling the attackers data with the original model") pred_y = wm_model.predict(attacker_data[0]) attacker_data = attacker_data[0], pred_y attacker_data_reg = (attacker_data[0][:attacker_data_size_reg], attacker_data[1][:attacker_data_size_reg]) if verbose: print( "[5/6] Removing the watermark with the regularization attack.. {}". format(freeze_first_layers)) additional_callbacks2 = [ AdditionalValidationSets([(trigger_test[0], trigger_test[1], 'watermark_new')]), ShowErrorsCallback(dataset=trigger, prefix="WB Trigger (Train)"), ShowErrorsCallback(dataset=trigger_test, prefix="WB Trigger (Test)") ] surr_model_reg, reg_history = whitebox_attack( wm_model=wm_model, load_model_func=load_wm_model_func, load_func_kwargs={"reg": reg_whitebox}, load_func_kwargs2={ "reg": reg_surr, "optimizer": RMSprop(lr=lr_surr), "freeze_first_layers": freeze_first_layers }, trigger_set=trigger, train_data=attacker_data_reg, test_data=test_data, batchsize=batchsize_reg, additional_callbacks=additional_callbacks2, epochs_reg=epochs_reg, early_stopping_wm=early_stopping_wm_reg, # When to stop patience=patience_reg, cache_surr_model=cache_reg_model, verbose=False) print("[6/6] Training the surrogate model") additional_callbacks_surr = [ AdditionalValidationSets([(trigger_test[0], trigger_test[1], 'watermark_new')]), ShowErrorsCallback(dataset=trigger, prefix="BB Trigger (Train)"), ShowErrorsCallback(dataset=trigger_test, prefix="BB Trigger (Test)") ] surr_model, history_surr = blackbox_attack( surrogate_model=surr_model_reg, epochs_surr=epochs_surr, train_data=attacker_data, trigger_set=trigger, test_data=test_data, batchsize=batchsize_surr, additional_callbacks=additional_callbacks_surr, cache_surr_model=cache_surr_model, verbose=False) return surr_model, (history_embed, reg_history, history_surr)
def countermark_blackbox( load_dataset_func, # Function that loads the training and testing data model, # Model specification for wm_embedding surrogate_model, # Model specification for surrogate model training load_trigger_func, # Function for loading the watermark set dataset_label="", # Chosen label for the dataset (if caching is enabled) key_length=100, wm_boost_factor=100, owner_data_size=35000, total_owner_data_size=100000, attacker_data_size=15000, total_attacker_data_size=100000, epochs_embed=10, epochs_surr=20, batchsize_embed=64, batchsize_surr=64, cache_embed_wm=None, cache_surr_model=None, verbose=True): """ Generates a model that carries a COUNTERMARK watermark and a blackbox surrogate model that (hopefully) also carries the COUNTERMARK watermark """ if verbose: print("[1/4] Fingerprint Blackbox Attack: Loading {} data".format(dataset_label)) print(" Owner data: {} Attacker Data: {}".format(total_owner_data_size, total_attacker_data_size)) cache_embed_wm, cache_surr_model, = concat_labels_if_not_none([cache_embed_wm, cache_surr_model], dataset_label) (all_x, all_y), test_data = load_dataset_func() if owner_data_size + attacker_data_size > len(all_x): raise RuntimeError("Blackbox Attack data error! Trying to consume more training data than there is available!" " {}>{}".format(owner_data_size + attacker_data_size, len(all_x))) # Assure owner data and attacker data are mutually exclusive! owner_data, owner_data_from_cache = augment_data( set_to_augment=(all_x[:owner_data_size], all_y[:owner_data_size]), prefix=dataset_label, total_size=total_owner_data_size, use_cached_training_data="owner_data" + str(total_owner_data_size) + str(total_attacker_data_size), verbose=verbose) attacker_data, attacker_data_from_cache = augment_data( set_to_augment=(all_x[owner_data_size:owner_data_size + attacker_data_size], all_y[owner_data_size:owner_data_size + attacker_data_size]), prefix=dataset_label, total_size=total_attacker_data_size, use_cached_training_data="attacker_data" + str(total_owner_data_size) + str(total_attacker_data_size), verbose=verbose) # Make sure to always regenerate both files if necessary if owner_data_from_cache != attacker_data_from_cache: raise RuntimeError("Blackbox Attack data error! Sets are not mutually exclusive, please delete conflicting " "file ending in '{}'!".format(str(total_owner_data_size) + str(total_attacker_data_size))) if verbose: print("[2/4] Training the network with {} keys each repeated {} times)".format(key_length, wm_boost_factor)) trigger = load_trigger_func() additional_callbacks = [ShowErrorsCallback(dataset=trigger, prefix="Embed Trigger")] wm_model, history_embed, trigger = embed_wm(model=model, epochs=epochs_embed, train_data=owner_data, trigger_set=trigger, test_data=test_data, key_length=key_length, wm_boost_factor=wm_boost_factor, batchsize=batchsize_embed, additional_callbacks=additional_callbacks, cache_embed_wm=cache_embed_wm, verbose=False) if verbose: print(" Evaluating accuracy on attacker data...", end="", flush=True) acc_on_attacker_data = wm_model.evaluate(attacker_data[0], attacker_data[1]) print(" Done! Original discriminators accuracy on attackers data: {}".format(acc_on_attacker_data[1])) print("[3/4] Labeling the attackers data with the original model") pred_y = wm_model.predict(attacker_data[0]) attacker_data = attacker_data[0], pred_y if verbose: print("[4/4] Training the surrogate model") additional_callbacks = [ShowErrorsCallback(dataset=trigger, prefix="BB Trigger")] surr_model, history_surr = blackbox_attack(surrogate_model=surrogate_model, epochs_surr=epochs_surr, trigger_set=trigger, train_data=attacker_data, test_data=test_data, batchsize=batchsize_surr, additional_callbacks=additional_callbacks, cache_surr_model=cache_surr_model, verbose=False) return surr_model, (history_embed, history_surr)
def usenix_whitebox( load_dataset_func, # Which dataset to choose. Should return training and testing data dataset_label, # Label of the dataset (for caching) load_wm_model_func, # Model specification for wm_embedding owner_data_size=35000, total_owner_data_size=100000, key_length=35, wm_boost_factor=1000, attacker_data_size=15000, attacker_data_size_reg=10000, total_attacker_data_size=15000, epochs_embed=10, epochs_reg=30, epochs_surr=10, early_stopping_wm_reg=0.1, # At which watermark accuracy to stop the whitebox attack patience_reg=2, lr_surr=0.001, # Learning rate for the surrogate model freeze_first_layers=0, # How many layers to freeze for surrogate model reg_whitebox=0.0, reg_surr=0.0, batchsize_embed=64, batchsize_reg=64, batchsize_surr=64, cache_embed_wm=None, cache_reg_model=None, cache_surr_model=None, verbose=True): """ Generates two mutually exclusive data sets for the owner and the attacker. Trains a watermarked model for the owner with the ASIACCS embedding. Then runs a regularization and a surrogate model attack with the attackers data. """ if verbose: print("[1/5] USENIX Whitebox Attack: Loading {} data".format( dataset_label)) print(" Owner data: {} Attacker Data: {}".format( total_owner_data_size, total_attacker_data_size)) cache_embed_wm, cache_reg_model, cache_surr_model, = concat_labels_if_not_none( [cache_embed_wm, cache_reg_model, cache_surr_model], dataset_label) (all_x, all_y), test_data = load_dataset_func() if owner_data_size + attacker_data_size > len(all_x): raise RuntimeError( "Whitebox Attack data error! Trying to consume more training data than there is available!" " {}>{}".format(owner_data_size + attacker_data_size, len(all_x))) # Assure owner data and attacker data are mutually exclusive! owner_data, owner_data_from_cache = augment_data( set_to_augment=(all_x[:owner_data_size], all_y[:owner_data_size]), prefix=dataset_label, total_size=total_owner_data_size, use_cached_training_data="owner_data" + str(total_owner_data_size) + str(total_attacker_data_size), verbose=verbose) attacker_data, attacker_data_from_cache = augment_data( set_to_augment=(all_x[owner_data_size:owner_data_size + attacker_data_size], all_y[owner_data_size:owner_data_size + attacker_data_size]), prefix=dataset_label, total_size=total_attacker_data_size, use_cached_training_data="attacker_data" + str(total_owner_data_size) + str(total_attacker_data_size), verbose=verbose) # Make sure to always regenerate both files if necessary if owner_data_from_cache != attacker_data_from_cache: raise RuntimeError( "Whitebox Attack data error! Sets are not mutually exclusive, please delete conflicting " "file ending in '{}'!".format( str(total_owner_data_size) + str(total_attacker_data_size))) if verbose: print( "[2/5] Training the network with {} keys each repeated {} times)". format(key_length, wm_boost_factor)) trigger = load_wm_images_usenix(imgsize=all_x[0].shape) additional_callbacks = [ ShowErrorsCallback(dataset=trigger, prefix="Embed Trigger") ] wm_model, history_embed, trigger = embed_wm( model=load_wm_model_func(), epochs=epochs_embed, key_length=key_length, train_data=owner_data, trigger_set=trigger, test_data=test_data, wm_boost_factor=wm_boost_factor, batchsize=batchsize_embed, additional_callbacks=additional_callbacks, cache_embed_wm=cache_embed_wm, verbose=False) if verbose: print(" Evaluating accuracy on attacker data...", end="", flush=True) acc_on_attacker_data = wm_model.evaluate(attacker_data[0], attacker_data[1]) print( " Done! Original discriminators accuracy on attackers data: {}". format(acc_on_attacker_data[1])) print("[3/5] Labeling the attackers data with the original model") pred_y = wm_model.predict(attacker_data[0]) attacker_data = attacker_data[0], pred_y attacker_data_reg = (attacker_data[0][0:attacker_data_size_reg], attacker_data[1][0:attacker_data_size_reg]) if verbose: print( "[4/5] Removing the watermark with the regularization attack.. {}". format(freeze_first_layers)) additional_callbacks = [ ShowErrorsCallback(dataset=trigger, prefix="WB Trigger") ] surr_model_reg, history_reg = whitebox_attack( wm_model=wm_model, load_model_func=load_wm_model_func, load_func_kwargs={"reg": reg_whitebox}, load_func_kwargs2={ "reg": reg_surr, "optimizer": RMSprop(lr=lr_surr), "freeze_first_layers": freeze_first_layers }, trigger_set=trigger, train_data=attacker_data_reg, test_data=test_data, batchsize=batchsize_reg, epochs_reg=epochs_reg, early_stopping_wm=early_stopping_wm_reg, # When to stop patience=patience_reg, additional_callbacks=additional_callbacks, cache_surr_model=cache_reg_model, verbose=False) if verbose: print("[5/5] Training the surrogate model") additional_callbacks = [ ShowErrorsCallback(dataset=trigger, prefix="BB Trigger") ] surr_model, history_surr = blackbox_attack( surrogate_model=surr_model_reg, epochs_surr=epochs_surr, train_data=attacker_data, trigger_set=trigger, test_data=test_data, batchsize=batchsize_surr, additional_callbacks=additional_callbacks, cache_surr_model=cache_surr_model, verbose=False) return surr_model, (history_embed, history_reg, history_surr)