Пример #1
0
    def train(self,
              epochs=5,
              c_list=[0, 0.1, 0.3, 0.5, 0.7, 0.9],
              data_set_list=["Cifar", "Mnist"]):
        """
        This method trains several models and saves their weigths. This avoids to instanciate
        several models simultaneously which leads to OOM errors. It saves
        len(c_list)*len(data_set_list) models.
        inputs:
        -epochs(int>0): number of training epochs
        -c_list (list of float between 0 and 1): contains the c parameters for the objective
            function; 0=no adv training, 1=adv training only
        -data_set_list (list of str): names of the data sets on which to train the model
        """
        cfg = get_cfg()

        for data_set_name, c_param in itertools.product(data_set_list, c_list):
            save_path = join(cfg.MODELS_PATH, "adversarial_training",
                             str(data_set_name), f"c={c_param}.h5")

            (X_train, X_test, y_train, y_test) = pick_data_set(data_set_name)
            train_data_set = (tf.data.Dataset.from_tensor_slices(
                (X_train, y_train)).shuffle(buffer_size=100000).batch(128))

            print(
                f"\n======= Looking for dataset: {data_set_name} c: {c_param} ======="
            )
            self.c_param = c_param

            if not exists(save_path):
                print(f"Dataset not found at {save_path}; training new model")
                acc = compute_acc(self.model, X_test[:1000], y_test[:1000])

                print(f"Accuracy before training is {acc}\n--------------")
                for epoch in range(epochs):
                    start = time.time()
                    for (data, label) in tqdm(train_data_set, position=0):
                        data = tf.cast(data, dtype="float32")
                        self.train_step(data, label)

                    acc = compute_acc(self.model, X_test[:1000], y_test[:1000])

                    print(
                        f"Time for epoch {epoch + 1} is {time.time() - start} sec"
                    )
                    print("-----------")
                    print(f"Accuracy for epoch {epoch + 1} is {acc} ")

                self.model.save_weights(save_path)

            else:
                print(f"Dataset found at {save_path}; using pretrained model")
                self.model.load_weights(save_path)
                acc = compute_acc(self.model, X_test[:1000], y_test[:1000])

                print("-----------")
                print(f"Accuracy is {acc}")

        return ()
def train_and_save_effnet(data_set_name):
    """
    This fonction train (or load) and save an instance of EfficientNet
    -name : 'Cifar' or 'Mnist'
    output:
    -model_effnet (tensorflow model): trained instance of EfficientNet

    """
    (X_train, _, y_train, _) = pick_data_set(data_set_name)
    tf.keras.backend.clear_session()
    effnet_base = EfficientNetB7(
        weights="imagenet", include_top=False, input_shape=(32, 32, 3)
    )
    effnet_base.trainable = True
    layer = GlobalMaxPooling2D(name="pool_1")(effnet_base.layers[-2].output)
    layer = Dropout(0.2, name="dropout_2")(layer)
    layer = Dense(32)(layer)
    layer = Dense(10, name="fc_2")(layer)
    output = Activation("softmax", name="act_2")(layer)
    model_effnet = Model(inputs=effnet_base.input, outputs=[output])

    cfg = get_cfg()
    model_path = cfg.MODELS_PATH + "effnet_model_" + str(data_set_name) + ".h5"
    if not exists(model_path):
        model_effnet.compile(
            loss="categorical_crossentropy", optimizer="nadam", metrics=["accuracy"]
        )
        _ = model_effnet.fit(
            X_train,
            y_train,
            epochs=5,
            batch_size=128,
            validation_split=0.1,
            shuffle=True,
            verbose=1,
        )
        model_effnet.save(model_path)

    else:
        model_effnet = load_model(model_path)

    return model_effnet
def train_and_save_small_model(data_set_name):
    """
    This fonction train (or load) and save an instance of a small custom CNN
    -name : 'Cifar' or 'Mnist'
    output:
    -small model (tensorflow model): trained instance of the small model

    """
    cfg = get_cfg()
    model_path = cfg.MODELS_PATH + "small_model_" + str(data_set_name) + ".h5"
    if not exists(model_path):
        (X_train, _, y_train, _) = pick_data_set(data_set_name)
        tf.keras.backend.clear_session()
        small_model = tf.keras.models.Sequential()
        small_model.add(Conv2D(64, (3, 3), activation="relu", input_shape=(32, 32, 3)))
        small_model.add(MaxPooling2D(2, 2))
        small_model.add(Conv2D(64, (3, 3), activation="relu"))
        small_model.add(MaxPooling2D(2, 2))
        small_model.add(Flatten())
        small_model.add(Dense(128, activation="relu"))
        small_model.add(Dense(10, activation="softmax"))

        small_model.compile(
            loss="categorical_crossentropy", optimizer="nadam", metrics=["accuracy"]
        )

        _ = small_model.fit(
            X_train,
            y_train,
            epochs=10,
            batch_size=128,
            validation_split=0.1,
            shuffle=True,
            verbose=1,
        )

        small_model.save(model_path)
    else:
        small_model = load_model(model_path)
    return small_model
def create_model_with_defense(data_set_name):
    """
    input:
    -data set name (str) : name of the data set on which to train the model ('Mnist' or 'Cifar' )
    -output: a trained tensorflow model
    """

    (X_train, _, y_train, _) = pick_data_set(data_set_name)
    model_without_def = train_and_save_effnet(data_set_name)

    cfg = get_cfg()
    model_path = cfg.MODELS_PATH + "/random_padding/" + str(
        data_set_name) + ".h5"

    model_with_def = tf.keras.models.Sequential()
    model_with_def.add(Input(shape=(32, 32, 3)))
    model_with_def.add(ResizePad())
    model_with_def.add(model_without_def)

    model_with_def.compile(loss="categorical_crossentropy",
                           optimizer="nadam",
                           metrics=["accuracy"])
    if not exists(model_path):
        _ = model_with_def.fit(
            X_train,
            y_train,
            epochs=5,
            batch_size=32,
            validation_split=0.1,
            shuffle=True,
            verbose=1,
        )
        model_with_def.save_weights(model_path)
    else:
        model_with_def.load_weights(model_path)

    return model_with_def
Пример #5
0
def make_adv_data_set(data_set_name,
                      model_effnet,
                      number_of_image_to_use=6000,
                      attack=LinfFastGradientAttack()):
    """
    This function creates the data set that will be used to train the autoencoder (same than
    previous function)? but it also add some couples (benign image, bening image) so that the model
    'understands' thaT not all images are adversarial. It also shuffles it.

    inputs:
    -data_set_name: 'Cifar' or 'Mnist'
    -model_effnet (tensorflow model): model that will be attacked to produced the adv examples
    -number_of_image_to_use (int): number of images used to produced adv examples (the resulting
        number of adv images will be less than that)
    -attack (foolbox attack): attack used to produce the adversarial example

    outputs:
    -adv_list (list of numpy arrays): adversarial images
    -benign_list (list of numpy arrays): benign images corresponding to the images in adv_list
    -adv_true_label: true labels of the images (attention here the are not one hot encoded)


    """
    (_, X_test, _, y_test) = pick_data_set(data_set_name)
    cfg = get_cfg()

    path = cfg.DATA_PATH + "adv images and benign images " + str(data_set_name)

    if not exists(path):
        (adv_list, benign_list, adv_true_label) = data_set_maker(
            model_effnet,
            attack,
            X_test[:number_of_image_to_use],
            y_test[:number_of_image_to_use],
        )
        with open(path, "wb") as f:  # pylint: disable=invalid-name
            pickle.Pickler(f).dump(adv_list)
            pickle.Pickler(f).dump(benign_list)
            pickle.Pickler(f).dump(adv_true_label)
    else:
        with open(path, "rb") as f:  # pylint: disable=invalid-name
            adv_list = pickle.Unpickler(f).load()
            benign_list = pickle.Unpickler(f).load()
            adv_true_label = pickle.Unpickler(f).load()

    # let us add some benign examples to the data set and shuffle the result
    adv_list.extend(X_test[number_of_image_to_use:number_of_image_to_use +
                           1000])
    benign_list.extend(X_test[number_of_image_to_use:number_of_image_to_use +
                              1000])
    adv_true_label.extend(
        list(
            map(
                np.argmax,
                y_test[number_of_image_to_use:number_of_image_to_use + 1000],
            )))

    adv_list = np.array(adv_list)
    benign_list = np.array(benign_list)
    adv_true_label = np.array(adv_true_label)

    indices = np.arange(len(adv_list))
    random.shuffle(indices)
    adv_list = adv_list[indices]
    benign_list = benign_list[indices]
    adv_true_label = adv_true_label[indices]
    return (adv_list, benign_list, adv_true_label)