Пример #1
0
    def run(self):

        print('MINDFUL EXECUTION')

        dsConf = self.ds
        pathModels = dsConf.get('pathModels')
        pathPlot = dsConf.get('pathPlot')
        configuration = self.config

        VALIDATION_SPLIT = float(configuration.get('VALIDATION_SPLIT'))
        N_CLASSES = int(configuration.get('N_CLASSES'))
        pd.set_option('display.expand_frame_repr', False)

        # contains path of dataset and model and preprocessing phases
        ds = Datasets(dsConf)
        ds.preprocessing1()
        train, test = ds.getTrain_Test()
        print(test)
        prp = prep(train, test)

        # Preprocessing phase from original to numerical dataset
        PREPROCESSING1 = int(configuration.get('PREPROCESSING1'))
        if (PREPROCESSING1 == 1):

            train, test = ds.preprocessing2(prp)
        else:
            train, test = ds.getNumericDatasets()

        clsT, clsTest = prp.getCls()
        train_normal = train[(train[clsT] == 1)]

        train_anormal = train[(train[clsT] == 0)]
        test_normal = test[(test[clsTest] == 1)]
        test_anormal = test[(test[clsTest] == 0)]

        train_XN, train_YN, test_XN, test_YN = prp.getXY(
            train_normal, test_normal)

        train_XA, train_YA, test_XA, test_YA = prp.getXY(
            train_anormal, test_anormal)
        train_X, train_Y, test_X, test_Y = prp.getXY(train, test)

        print('Train data shape normal', train_XN.shape)
        print('Train target shape normal', train_YN.shape)
        print('Test data shape normal', test_XN.shape)
        print('Test target shape normal', test_YN.shape)

        print('Train data shape anormal', train_XA.shape)
        print('Train target shape anormal', train_YA.shape)
        print('Test data shape anormal', test_XA.shape)
        print('Test target shape anormal', test_YA.shape)

        # convert class vectors to binary class matrices fo softmax
        train_Y2 = np_utils.to_categorical(train_Y,
                                           int(configuration.get('N_CLASSES')))
        print("Target train shape after", train_Y2.shape)
        test_Y2 = np_utils.to_categorical(test_Y,
                                          int(configuration.get('N_CLASSES')))
        print("Target test shape after", test_Y2.shape)
        print("Train all", train_X.shape)
        print("Test all", test_X.shape)

        # create pandas for results
        columns = [
            'TP', 'FN', 'FP', 'TN', 'OA', 'AA', 'P', 'R', 'F1', 'FAR(FPR)',
            'TPR'
        ]
        results = pd.DataFrame(columns=columns)

        callbacks_list = [
            callbacks.EarlyStopping(monitor='val_loss',
                                    min_delta=0.0001,
                                    patience=5,
                                    restore_best_weights=True),
        ]

        if (int(configuration.get('LOAD_AUTOENCODER_NORMAL')) == 0):

            autoencoderN, p = ds.getAutoencoder_Normal(train_XN, N_CLASSES)
            autoencoderN.summary()

            history = autoencoderN.fit(train_XN,
                                       train_XN,
                                       validation_split=VALIDATION_SPLIT,
                                       batch_size=p['batch_size'],
                                       epochs=p['epochs'],
                                       shuffle=True,
                                       callbacks=callbacks_list,
                                       verbose=1)
            autoencoderN.save(pathModels + 'autoencoderNormal.h5')
            Plot.printPlotLoss(history, 'autoencoderN', pathPlot)
        else:
            print("Load autoencoder Normal from disk")
            autoencoderN = load_model(pathModels + 'autoencoderNormal.h5')
            autoencoderN.summary()

        train_RE = autoencoderN.predict(train_X)
        test_RE = autoencoderN.predict(test_X)

        if (int(configuration.get('LOAD_AUTOENCODER_ADV')) == 0):

            autoencoderA, p = ds.getAutoencoder_Attacks(+train_XA, N_CLASSES)

            autoencoderA.summary()

            history = autoencoderA.fit(train_XA,
                                       train_XA,
                                       validation_split=VALIDATION_SPLIT,
                                       batch_size=p['batch_size'],
                                       epochs=p['epochs'],
                                       shuffle=True,
                                       callbacks=callbacks_list,
                                       verbose=1)
            autoencoderA.save(pathModels + 'autoencoderAttacks.h5')
            Plot.printPlotLoss(history, 'autoencoderA', pathPlot)
        else:
            print("Load autoencoder Attacks from disk")
            autoencoderA = load_model(pathModels + 'autoencoderAttacks.h5')
            autoencoderA.summary()

        train_REA = autoencoderA.predict(train_X)
        test_REA = autoencoderA.predict(test_X)

        train_X_image, input_Shape = self.createImage(train_X, train_RE,
                                                      train_REA)  # XS UNSW
        test_X_image, input_shape = self.createImage(test_X, test_RE, test_REA)

        if (int(configuration.get('LOAD_CNN')) == 0):
            callbacks_list = [
                callbacks.EarlyStopping(monitor='val_loss',
                                        min_delta=0.0001,
                                        patience=20,
                                        restore_best_weights=True),
            ]

            model, p = ds.getMINDFUL(input_shape, N_CLASSES)

            history3 = model.fit(
                train_X_image,
                train_Y2,
                # validation_data=(test_X, test_Y2),
                validation_split=VALIDATION_SPLIT,
                batch_size=p['batch_size'],
                epochs=p['epochs'],
                shuffle=True,  # shuffle=false for NSL-KDD true for UNSW-NB15
                callbacks=callbacks_list,  # class_weight=class_weight,
                verbose=1)

            Plot.printPlotAccuracy(history3, 'finalModel1', pathPlot)
            Plot.printPlotLoss(history3, 'finalModel1', pathPlot)
            model.save(pathModels + 'MINDFUL.h5')
        else:
            print("Load softmax from disk")
            model = load_model(pathModels + 'MINDFUL.h5')
            model.summary()

        predictionsL = model.predict(train_X_image)
        y_pred = np.argmax(predictionsL, axis=1)
        cmC = confusion_matrix(train_Y, y_pred)
        print('Prediction Training')
        print(cmC)

        predictionsL = model.predict(test_X_image)
        y_pred = np.argmax(predictionsL, axis=1)
        cm = confusion_matrix(test_Y, y_pred)
        print('Prediction Test')
        print(cm)

        r = getResult(cm, N_CLASSES)

        dfResults = pd.DataFrame([r], columns=columns)
        print(dfResults)

        results = results.append(dfResults, ignore_index=True)

        results.to_csv(ds._testpath + '_results.csv', index=False)
Пример #2
0
def main():
    pd.set_option('display.expand_frame_repr', False)
    pathFolder = input("Insert dataset path folder  (tips: dataset):")
    pathDataset = input("Insert dataset path folder  (tips: KDD99.csv):")
    pathPlot = input("Insert plot path folder  (tips: plot):")
    df = pd.read_csv(os.path.join(pathFolder, pathDataset), delimiter=",")
    print("Dataset shape: ", df.shape)
    print("Dataset before preprocessing: ")
    print(df.head(5))

    #Show distinct classification target
    distinctLabels = df[df.columns[-1]].unique().tolist()
    N_CLASSES = len(distinctLabels)

    print("Start preprocessing step")
    numericColumn = df.select_dtypes(include=[np.number]).columns.tolist(
    )  #retrieve all numerical columns for standard scaler
    classificationCol = df.columns[-1]  #name of target column
    print(classificationCol)

    #preprocessing: map target from categorical to numeric and one-hot encoding at categorical columns
    df = preprocessingDS(df)
    print("Dataset after one-hot encoding:")
    print(df.head(5))

    #preprocessing: standar scaler
    df = scaler(df, numericColumn)

    #Split function on train and testing set
    sizesplit = split_dataset()
    pl = Plot(pathPlot)

    train, test = train_test_split(df, test_size=sizesplit)

    print("Train shape after split: ", train.shape)
    print("Test shape after split: ", test.shape)
    pl.plotStatistics(train, test, classificationCol)

    train_X, train_Y, test_X, test_Y = getXY(train, test, classificationCol)
    # convert class vectors to binary class matrices
    train_Y2 = np_utils.to_categorical(train_Y, N_CLASSES)

    callbacks_list = [
        callbacks.EarlyStopping(monitor='val_loss',
                                patience=4,
                                restore_best_weights=True)
    ]

    m = Models(N_CLASSES)

    VALIDATION_SPLIT = 0.1
    print('Model with autoencoder+softmax with fixed encoder weights')
    # parametri per autoencoder
    p1 = {
        'first_layer': 60,
        'second_layer': 30,
        'third_layer': 10,
        'batch_size': 64,
        'epochs': 150,
        'optimizer': optimizers.Adam,
        'kernel_initializer': 'glorot_uniform',
        'losses': 'mse',
        'first_activation': 'tanh',
        'second_activation': 'tanh',
        'third_activation': 'tanh'
    }

    autoencoder = m.deepAutoEncoder(train_X, p1)
    autoencoder.summary()

    #get encoder for feature extraction
    encoder = Model(inputs=autoencoder.input,
                    outputs=autoencoder.get_layer('encoder3').output)
    encoder.summary()

    history2 = autoencoder.fit(train_X,
                               train_X,
                               validation_split=VALIDATION_SPLIT,
                               batch_size=p1['batch_size'],
                               epochs=p1['epochs'],
                               shuffle=False,
                               callbacks=callbacks_list,
                               verbose=1)

    pl.printPlotLoss(history2, 'autoencoder')
    plot_model(autoencoder,
               to_file='autoencoder.png',
               show_shapes=True,
               show_layer_names=True)
    '''
 Save weigths from autoencoder model
 Weights are fixed in the classifier model
 '''
    weights = []
    i = 0
    for layer in autoencoder.layers:
        weights.append(layer.get_weights())

    # parameters for final model
    p2 = {
        'batch_size': 256,
        'epochs': 100,
        'optimizer': optimizers.Adam,
        'kernel_initializer': 'glorot_uniform',
        'losses': 'binary_crossentropy',
        'first_activation': 'tanh',
        'second_activation': 'tanh',
        'third_activation': 'relu'
    }

    model = m.MLP_WeightFixed(encoder, train_X, p2)

    history3 = model.fit(train_X,
                         train_Y2,
                         validation_split=VALIDATION_SPLIT,
                         batch_size=p1['batch_size'],
                         epochs=p1['epochs'],
                         shuffle=False,
                         callbacks=callbacks_list,
                         verbose=1)

    pl.printPlotAccuracy(history3, 'finalModel1')
    pl.printPlotLoss(history2, 'finalModel1')
    model.save('modelfixedW.h5')
    plot_model(model,
               to_file='classifier.png',
               show_shapes=True,
               show_layer_names=True)

    predictions = model.predict(test_X)

    # Predicting the Test set results
    y_pred = np.argmax(predictions, axis=1)
    cm = confusion_matrix(test_Y, y_pred)
    acc = accuracy_score(test_Y, y_pred, normalize=True)
    LABELS = ["Attacks", "Normal"]
    print("Confusion matrix on test set")
    print(cm)
    print("Accuracy model on test set: " + str(acc))
    plt.figure(figsize=(12, 12))
    sns.heatmap(cm,
                xticklabels=LABELS,
                yticklabels=LABELS,
                annot=True,
                fmt="d")
    plt.title("Confusion matrix on test set")
    plt.ylabel('True class')
    plt.xlabel('Predicted class')
    plt.savefig(os.path.join(pathPlot, "confusion matrix"))
    plt.show()
    plt.close()
Пример #3
0
    def run(self):

        print('MINDFUL EXECUTION')

        dsConf = self.ds
        pathModels = dsConf.get('pathModels')
        pathPlot = dsConf.get('pathPlot')
        configuration = self.config


        VALIDATION_SPLIT = float(configuration.get('VALIDATION_SPLIT'))
        N_CLASSES = int(configuration.get('N_CLASSES'))
        pd.set_option('display.expand_frame_repr', False)

        # contains path of dataset and model and preprocessing phases
        ds = Datasets(dsConf)
        ds.preprocessing1()
        train, test = ds.getTrain_TestCIDIS()
        prp = prep(train, test)

        # Preprocessing phase from original to numerical dataset
        PREPROCESSING1 = int(configuration.get('PREPROCESSING1'))
        if (PREPROCESSING1 == 1):

            train, test = ds.preprocessing2(prp)
        else:
            train, test = ds.getNumericDatasets()


        clsT, clsTest = prp.getCls()
        train_normal = train[(train[clsT] == 1)]


        train_anormal = train[(train[clsT] == 0)]


        train_XN, train_YN = prp.getXYTrain(train_normal)

        train_XA, train_YA = prp.getXYTrain(train_anormal)


        train_X, train_Y, test_X, test_Y = prp.getXYCICIDS(train, test)



        print('Train data shape normal', train_XN.shape)
        print('Train target shape normal', train_YN.shape)
        print('Train data shape anormal', train_XA.shape)
        print('Train target shape anormal', train_YA.shape)


        # convert class vectors to binary class matrices fo softmax
        train_Y2 = np_utils.to_categorical(train_Y, int(configuration.get('N_CLASSES')))
        print("Target train shape after", train_Y2.shape)
        test_Y2 = list()
        for t in test_Y:
            t_Y2 = np_utils.to_categorical(t, int(configuration.get('N_CLASSES')))
            test_Y2.append(t_Y2)
        print("Train all", train_X.shape)

        # create pandas for results
        columns = ['TP', 'FN', 'FP', 'TN', 'OA', 'AA', 'P', 'R', 'F1', 'FAR(FPR)', 'TPR']
        results = pd.DataFrame(columns=columns)

        callbacks_list = [
            callbacks.EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=20, restore_best_weights=True),
        ]

        if (int(configuration.get('LOAD_AUTOENCODER_NORMAL')) == 0):


            autoencoderN, p = ds.getAutoencoder_Normal(train_XN, N_CLASSES)

            encoderN = Model(inputs=autoencoderN.input, outputs=autoencoderN.get_layer('encoder3').output)
            encoderN.summary()

            history = autoencoderN.fit(train_XN, train_XN,
                                       validation_split=VALIDATION_SPLIT,
                                       batch_size=p['batch_size'],
                                       epochs=p['epochs'], shuffle=True,
                                       callbacks=callbacks_list,
                                       verbose=1)
            autoencoderN.save(pathModels + 'autoencoderNormal.h5')
            Plot.printPlotLoss(history, 'autoencoderN', pathPlot)
        else:
            print("Load autoencoder Normal from disk")
            autoencoderN = load_model(pathModels + 'autoencoderNormal.h5')
            autoencoderN.summary()


        train_RE = autoencoderN.predict(train_X)
        # test
        test_RE = []
        for t in test_X:
            t_N = autoencoderN.predict(t)
            testX = t_N
            test_RE.append(testX)



        if (int(configuration.get('LOAD_AUTOENCODER_ADV')) == 0):


            autoencoderA, p = ds.getAutoencoder_Attacks(+train_XA, N_CLASSES)

            encoderA = Model(inputs=autoencoderA.input, outputs=autoencoderA.get_layer('encoder3').output)
            encoderA.summary()

            history = autoencoderA.fit(train_XA, train_XA,
                                       validation_split=VALIDATION_SPLIT,
                                       batch_size=p['batch_size'],
                                       epochs=p['epochs'], shuffle=True,
                                       callbacks=callbacks_list,
                                       verbose=1)
            autoencoderA.save(pathModels + 'autoencoderAttacks.h5')
            Plot.printPlotLoss(history, 'autoencoderA', pathPlot)
        else:
            print("Load autoencoder Attacks from disk")
            autoencoderA = load_model(pathModels + 'autoencoderAttacks.h5')
            autoencoderA.summary()

        train_REA = autoencoderA.predict(train_X)
        # test predictions
        test_REA = []
        for t in test_X:
            testXA = autoencoderA.predict(t)
            testR = testXA
            test_REA.append(testR)



        train_X_image, input_Shape = self.createImage(train_X, train_RE, train_REA)  # XS UNSW
        test_X_image = list()

        for t, tN, tA in zip(test_X, test_RE, test_REA):
            test_XIm, input_shape = self.createImage(t, tN, tA)
            test_X_image.append(test_XIm)




        if (int(configuration.get('LOAD_CNN')) == 0):
            callbacks_list = [
                callbacks.EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=10,
                                        restore_best_weights=True),
            ]

            model, p = ds.getMINDFUL(input_shape, N_CLASSES)
            XTraining, XValidation, YTraining, YValidation = train_test_split(train_X_image, train_Y2, stratify=train_Y2,
                                                                              test_size=0.2)  # before model building


            history3 = model.fit(XTraining, YTraining,
                                 # validation_data=(test_X, test_Y2),
                                 validation_data=(XValidation, YValidation),
                                 batch_size=p['batch_size'],
                                 epochs=p['epochs'], shuffle=True,
                                 callbacks=callbacks_list,
                                 verbose=1)

            Plot.printPlotAccuracy(history3, 'finalModel1', pathPlot)
            Plot.printPlotLoss(history3, 'finalModel1', pathPlot)
            model.save(pathModels + 'MINDFUL.h5')
        else:
            print("Load softmax from disk")
            model = load_model(pathModels + 'MINDFUL.h5')
            model.summary()



        predictionsL = model.predict(train_X_image)
        y_pred = np.argmax(predictionsL, axis=1)
        cmC = confusion_matrix(train_Y, y_pred)
        print('Prediction Training')
        print(cmC)

        r_list = []
        i = 0
        for t, Y in zip(test_X_image, test_Y):
            i += 1
            predictionsC = model.predict(t)
            print('Softmax on test set')
            y_pred = np.argmax(predictionsC, axis=1)
            cm = confusion_matrix(Y, y_pred)
            print(cm)
            r = getResult(cm, N_CLASSES)
            r_list.append(tuple(r))



        dfResults_temp = pd.DataFrame(r_list, columns=columns)
        drMean = dfResults_temp.mean(axis=0)
        drmeanList = pd.Series(drMean).values
        r_mean = []
        for i in drmeanList:
            r_mean.append(i)

        dfResults = pd.DataFrame([r], columns=columns)
        print(dfResults)


        results = results.append(dfResults, ignore_index=True)


        results.to_csv(ds._testpath + '_results.csv', index=False)