def run(self): print('MINDFUL EXECUTION') dsConf = self.ds pathModels = dsConf.get('pathModels') pathPlot = dsConf.get('pathPlot') configuration = self.config VALIDATION_SPLIT = float(configuration.get('VALIDATION_SPLIT')) N_CLASSES = int(configuration.get('N_CLASSES')) pd.set_option('display.expand_frame_repr', False) # contains path of dataset and model and preprocessing phases ds = Datasets(dsConf) ds.preprocessing1() train, test = ds.getTrain_Test() print(test) prp = prep(train, test) # Preprocessing phase from original to numerical dataset PREPROCESSING1 = int(configuration.get('PREPROCESSING1')) if (PREPROCESSING1 == 1): train, test = ds.preprocessing2(prp) else: train, test = ds.getNumericDatasets() clsT, clsTest = prp.getCls() train_normal = train[(train[clsT] == 1)] train_anormal = train[(train[clsT] == 0)] test_normal = test[(test[clsTest] == 1)] test_anormal = test[(test[clsTest] == 0)] train_XN, train_YN, test_XN, test_YN = prp.getXY( train_normal, test_normal) train_XA, train_YA, test_XA, test_YA = prp.getXY( train_anormal, test_anormal) train_X, train_Y, test_X, test_Y = prp.getXY(train, test) print('Train data shape normal', train_XN.shape) print('Train target shape normal', train_YN.shape) print('Test data shape normal', test_XN.shape) print('Test target shape normal', test_YN.shape) print('Train data shape anormal', train_XA.shape) print('Train target shape anormal', train_YA.shape) print('Test data shape anormal', test_XA.shape) print('Test target shape anormal', test_YA.shape) # convert class vectors to binary class matrices fo softmax train_Y2 = np_utils.to_categorical(train_Y, int(configuration.get('N_CLASSES'))) print("Target train shape after", train_Y2.shape) test_Y2 = np_utils.to_categorical(test_Y, int(configuration.get('N_CLASSES'))) print("Target test shape after", test_Y2.shape) print("Train all", train_X.shape) print("Test all", test_X.shape) # create pandas for results columns = [ 'TP', 'FN', 'FP', 'TN', 'OA', 'AA', 'P', 'R', 'F1', 'FAR(FPR)', 'TPR' ] results = pd.DataFrame(columns=columns) callbacks_list = [ callbacks.EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=5, restore_best_weights=True), ] if (int(configuration.get('LOAD_AUTOENCODER_NORMAL')) == 0): autoencoderN, p = ds.getAutoencoder_Normal(train_XN, N_CLASSES) autoencoderN.summary() history = autoencoderN.fit(train_XN, train_XN, validation_split=VALIDATION_SPLIT, batch_size=p['batch_size'], epochs=p['epochs'], shuffle=True, callbacks=callbacks_list, verbose=1) autoencoderN.save(pathModels + 'autoencoderNormal.h5') Plot.printPlotLoss(history, 'autoencoderN', pathPlot) else: print("Load autoencoder Normal from disk") autoencoderN = load_model(pathModels + 'autoencoderNormal.h5') autoencoderN.summary() train_RE = autoencoderN.predict(train_X) test_RE = autoencoderN.predict(test_X) if (int(configuration.get('LOAD_AUTOENCODER_ADV')) == 0): autoencoderA, p = ds.getAutoencoder_Attacks(+train_XA, N_CLASSES) autoencoderA.summary() history = autoencoderA.fit(train_XA, train_XA, validation_split=VALIDATION_SPLIT, batch_size=p['batch_size'], epochs=p['epochs'], shuffle=True, callbacks=callbacks_list, verbose=1) autoencoderA.save(pathModels + 'autoencoderAttacks.h5') Plot.printPlotLoss(history, 'autoencoderA', pathPlot) else: print("Load autoencoder Attacks from disk") autoencoderA = load_model(pathModels + 'autoencoderAttacks.h5') autoencoderA.summary() train_REA = autoencoderA.predict(train_X) test_REA = autoencoderA.predict(test_X) train_X_image, input_Shape = self.createImage(train_X, train_RE, train_REA) # XS UNSW test_X_image, input_shape = self.createImage(test_X, test_RE, test_REA) if (int(configuration.get('LOAD_CNN')) == 0): callbacks_list = [ callbacks.EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=20, restore_best_weights=True), ] model, p = ds.getMINDFUL(input_shape, N_CLASSES) history3 = model.fit( train_X_image, train_Y2, # validation_data=(test_X, test_Y2), validation_split=VALIDATION_SPLIT, batch_size=p['batch_size'], epochs=p['epochs'], shuffle=True, # shuffle=false for NSL-KDD true for UNSW-NB15 callbacks=callbacks_list, # class_weight=class_weight, verbose=1) Plot.printPlotAccuracy(history3, 'finalModel1', pathPlot) Plot.printPlotLoss(history3, 'finalModel1', pathPlot) model.save(pathModels + 'MINDFUL.h5') else: print("Load softmax from disk") model = load_model(pathModels + 'MINDFUL.h5') model.summary() predictionsL = model.predict(train_X_image) y_pred = np.argmax(predictionsL, axis=1) cmC = confusion_matrix(train_Y, y_pred) print('Prediction Training') print(cmC) predictionsL = model.predict(test_X_image) y_pred = np.argmax(predictionsL, axis=1) cm = confusion_matrix(test_Y, y_pred) print('Prediction Test') print(cm) r = getResult(cm, N_CLASSES) dfResults = pd.DataFrame([r], columns=columns) print(dfResults) results = results.append(dfResults, ignore_index=True) results.to_csv(ds._testpath + '_results.csv', index=False)
def main(): pd.set_option('display.expand_frame_repr', False) pathFolder = input("Insert dataset path folder (tips: dataset):") pathDataset = input("Insert dataset path folder (tips: KDD99.csv):") pathPlot = input("Insert plot path folder (tips: plot):") df = pd.read_csv(os.path.join(pathFolder, pathDataset), delimiter=",") print("Dataset shape: ", df.shape) print("Dataset before preprocessing: ") print(df.head(5)) #Show distinct classification target distinctLabels = df[df.columns[-1]].unique().tolist() N_CLASSES = len(distinctLabels) print("Start preprocessing step") numericColumn = df.select_dtypes(include=[np.number]).columns.tolist( ) #retrieve all numerical columns for standard scaler classificationCol = df.columns[-1] #name of target column print(classificationCol) #preprocessing: map target from categorical to numeric and one-hot encoding at categorical columns df = preprocessingDS(df) print("Dataset after one-hot encoding:") print(df.head(5)) #preprocessing: standar scaler df = scaler(df, numericColumn) #Split function on train and testing set sizesplit = split_dataset() pl = Plot(pathPlot) train, test = train_test_split(df, test_size=sizesplit) print("Train shape after split: ", train.shape) print("Test shape after split: ", test.shape) pl.plotStatistics(train, test, classificationCol) train_X, train_Y, test_X, test_Y = getXY(train, test, classificationCol) # convert class vectors to binary class matrices train_Y2 = np_utils.to_categorical(train_Y, N_CLASSES) callbacks_list = [ callbacks.EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True) ] m = Models(N_CLASSES) VALIDATION_SPLIT = 0.1 print('Model with autoencoder+softmax with fixed encoder weights') # parametri per autoencoder p1 = { 'first_layer': 60, 'second_layer': 30, 'third_layer': 10, 'batch_size': 64, 'epochs': 150, 'optimizer': optimizers.Adam, 'kernel_initializer': 'glorot_uniform', 'losses': 'mse', 'first_activation': 'tanh', 'second_activation': 'tanh', 'third_activation': 'tanh' } autoencoder = m.deepAutoEncoder(train_X, p1) autoencoder.summary() #get encoder for feature extraction encoder = Model(inputs=autoencoder.input, outputs=autoencoder.get_layer('encoder3').output) encoder.summary() history2 = autoencoder.fit(train_X, train_X, validation_split=VALIDATION_SPLIT, batch_size=p1['batch_size'], epochs=p1['epochs'], shuffle=False, callbacks=callbacks_list, verbose=1) pl.printPlotLoss(history2, 'autoencoder') plot_model(autoencoder, to_file='autoencoder.png', show_shapes=True, show_layer_names=True) ''' Save weigths from autoencoder model Weights are fixed in the classifier model ''' weights = [] i = 0 for layer in autoencoder.layers: weights.append(layer.get_weights()) # parameters for final model p2 = { 'batch_size': 256, 'epochs': 100, 'optimizer': optimizers.Adam, 'kernel_initializer': 'glorot_uniform', 'losses': 'binary_crossentropy', 'first_activation': 'tanh', 'second_activation': 'tanh', 'third_activation': 'relu' } model = m.MLP_WeightFixed(encoder, train_X, p2) history3 = model.fit(train_X, train_Y2, validation_split=VALIDATION_SPLIT, batch_size=p1['batch_size'], epochs=p1['epochs'], shuffle=False, callbacks=callbacks_list, verbose=1) pl.printPlotAccuracy(history3, 'finalModel1') pl.printPlotLoss(history2, 'finalModel1') model.save('modelfixedW.h5') plot_model(model, to_file='classifier.png', show_shapes=True, show_layer_names=True) predictions = model.predict(test_X) # Predicting the Test set results y_pred = np.argmax(predictions, axis=1) cm = confusion_matrix(test_Y, y_pred) acc = accuracy_score(test_Y, y_pred, normalize=True) LABELS = ["Attacks", "Normal"] print("Confusion matrix on test set") print(cm) print("Accuracy model on test set: " + str(acc)) plt.figure(figsize=(12, 12)) sns.heatmap(cm, xticklabels=LABELS, yticklabels=LABELS, annot=True, fmt="d") plt.title("Confusion matrix on test set") plt.ylabel('True class') plt.xlabel('Predicted class') plt.savefig(os.path.join(pathPlot, "confusion matrix")) plt.show() plt.close()
def run(self): print('MINDFUL EXECUTION') dsConf = self.ds pathModels = dsConf.get('pathModels') pathPlot = dsConf.get('pathPlot') configuration = self.config VALIDATION_SPLIT = float(configuration.get('VALIDATION_SPLIT')) N_CLASSES = int(configuration.get('N_CLASSES')) pd.set_option('display.expand_frame_repr', False) # contains path of dataset and model and preprocessing phases ds = Datasets(dsConf) ds.preprocessing1() train, test = ds.getTrain_TestCIDIS() prp = prep(train, test) # Preprocessing phase from original to numerical dataset PREPROCESSING1 = int(configuration.get('PREPROCESSING1')) if (PREPROCESSING1 == 1): train, test = ds.preprocessing2(prp) else: train, test = ds.getNumericDatasets() clsT, clsTest = prp.getCls() train_normal = train[(train[clsT] == 1)] train_anormal = train[(train[clsT] == 0)] train_XN, train_YN = prp.getXYTrain(train_normal) train_XA, train_YA = prp.getXYTrain(train_anormal) train_X, train_Y, test_X, test_Y = prp.getXYCICIDS(train, test) print('Train data shape normal', train_XN.shape) print('Train target shape normal', train_YN.shape) print('Train data shape anormal', train_XA.shape) print('Train target shape anormal', train_YA.shape) # convert class vectors to binary class matrices fo softmax train_Y2 = np_utils.to_categorical(train_Y, int(configuration.get('N_CLASSES'))) print("Target train shape after", train_Y2.shape) test_Y2 = list() for t in test_Y: t_Y2 = np_utils.to_categorical(t, int(configuration.get('N_CLASSES'))) test_Y2.append(t_Y2) print("Train all", train_X.shape) # create pandas for results columns = ['TP', 'FN', 'FP', 'TN', 'OA', 'AA', 'P', 'R', 'F1', 'FAR(FPR)', 'TPR'] results = pd.DataFrame(columns=columns) callbacks_list = [ callbacks.EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=20, restore_best_weights=True), ] if (int(configuration.get('LOAD_AUTOENCODER_NORMAL')) == 0): autoencoderN, p = ds.getAutoencoder_Normal(train_XN, N_CLASSES) encoderN = Model(inputs=autoencoderN.input, outputs=autoencoderN.get_layer('encoder3').output) encoderN.summary() history = autoencoderN.fit(train_XN, train_XN, validation_split=VALIDATION_SPLIT, batch_size=p['batch_size'], epochs=p['epochs'], shuffle=True, callbacks=callbacks_list, verbose=1) autoencoderN.save(pathModels + 'autoencoderNormal.h5') Plot.printPlotLoss(history, 'autoencoderN', pathPlot) else: print("Load autoencoder Normal from disk") autoencoderN = load_model(pathModels + 'autoencoderNormal.h5') autoencoderN.summary() train_RE = autoencoderN.predict(train_X) # test test_RE = [] for t in test_X: t_N = autoencoderN.predict(t) testX = t_N test_RE.append(testX) if (int(configuration.get('LOAD_AUTOENCODER_ADV')) == 0): autoencoderA, p = ds.getAutoencoder_Attacks(+train_XA, N_CLASSES) encoderA = Model(inputs=autoencoderA.input, outputs=autoencoderA.get_layer('encoder3').output) encoderA.summary() history = autoencoderA.fit(train_XA, train_XA, validation_split=VALIDATION_SPLIT, batch_size=p['batch_size'], epochs=p['epochs'], shuffle=True, callbacks=callbacks_list, verbose=1) autoencoderA.save(pathModels + 'autoencoderAttacks.h5') Plot.printPlotLoss(history, 'autoencoderA', pathPlot) else: print("Load autoencoder Attacks from disk") autoencoderA = load_model(pathModels + 'autoencoderAttacks.h5') autoencoderA.summary() train_REA = autoencoderA.predict(train_X) # test predictions test_REA = [] for t in test_X: testXA = autoencoderA.predict(t) testR = testXA test_REA.append(testR) train_X_image, input_Shape = self.createImage(train_X, train_RE, train_REA) # XS UNSW test_X_image = list() for t, tN, tA in zip(test_X, test_RE, test_REA): test_XIm, input_shape = self.createImage(t, tN, tA) test_X_image.append(test_XIm) if (int(configuration.get('LOAD_CNN')) == 0): callbacks_list = [ callbacks.EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=10, restore_best_weights=True), ] model, p = ds.getMINDFUL(input_shape, N_CLASSES) XTraining, XValidation, YTraining, YValidation = train_test_split(train_X_image, train_Y2, stratify=train_Y2, test_size=0.2) # before model building history3 = model.fit(XTraining, YTraining, # validation_data=(test_X, test_Y2), validation_data=(XValidation, YValidation), batch_size=p['batch_size'], epochs=p['epochs'], shuffle=True, callbacks=callbacks_list, verbose=1) Plot.printPlotAccuracy(history3, 'finalModel1', pathPlot) Plot.printPlotLoss(history3, 'finalModel1', pathPlot) model.save(pathModels + 'MINDFUL.h5') else: print("Load softmax from disk") model = load_model(pathModels + 'MINDFUL.h5') model.summary() predictionsL = model.predict(train_X_image) y_pred = np.argmax(predictionsL, axis=1) cmC = confusion_matrix(train_Y, y_pred) print('Prediction Training') print(cmC) r_list = [] i = 0 for t, Y in zip(test_X_image, test_Y): i += 1 predictionsC = model.predict(t) print('Softmax on test set') y_pred = np.argmax(predictionsC, axis=1) cm = confusion_matrix(Y, y_pred) print(cm) r = getResult(cm, N_CLASSES) r_list.append(tuple(r)) dfResults_temp = pd.DataFrame(r_list, columns=columns) drMean = dfResults_temp.mean(axis=0) drmeanList = pd.Series(drMean).values r_mean = [] for i in drmeanList: r_mean.append(i) dfResults = pd.DataFrame([r], columns=columns) print(dfResults) results = results.append(dfResults, ignore_index=True) results.to_csv(ds._testpath + '_results.csv', index=False)