def main(): pd.set_option('display.expand_frame_repr', False) pathFolder = input("Insert dataset path folder (tips: dataset):") pathDataset = input("Insert dataset path folder (tips: KDD99.csv):") pathPlot = input("Insert plot path folder (tips: plot):") df = pd.read_csv(os.path.join(pathFolder, pathDataset), delimiter=",") print("Dataset shape: ", df.shape) print("Dataset before preprocessing: ") print(df.head(5)) #Show distinct classification target distinctLabels = df[df.columns[-1]].unique().tolist() N_CLASSES = len(distinctLabels) print("Start preprocessing step") numericColumn = df.select_dtypes(include=[np.number]).columns.tolist( ) #retrieve all numerical columns for standard scaler classificationCol = df.columns[-1] #name of target column print(classificationCol) #preprocessing: map target from categorical to numeric and one-hot encoding at categorical columns df = preprocessingDS(df) print("Dataset after one-hot encoding:") print(df.head(5)) #preprocessing: standar scaler df = scaler(df, numericColumn) #Split function on train and testing set sizesplit = split_dataset() pl = Plot(pathPlot) train, test = train_test_split(df, test_size=sizesplit) print("Train shape after split: ", train.shape) print("Test shape after split: ", test.shape) pl.plotStatistics(train, test, classificationCol) train_X, train_Y, test_X, test_Y = getXY(train, test, classificationCol) # convert class vectors to binary class matrices train_Y2 = np_utils.to_categorical(train_Y, N_CLASSES) callbacks_list = [ callbacks.EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True) ] m = Models(N_CLASSES) VALIDATION_SPLIT = 0.1 print('Model with autoencoder+softmax with fixed encoder weights') # parametri per autoencoder p1 = { 'first_layer': 60, 'second_layer': 30, 'third_layer': 10, 'batch_size': 64, 'epochs': 150, 'optimizer': optimizers.Adam, 'kernel_initializer': 'glorot_uniform', 'losses': 'mse', 'first_activation': 'tanh', 'second_activation': 'tanh', 'third_activation': 'tanh' } autoencoder = m.deepAutoEncoder(train_X, p1) autoencoder.summary() #get encoder for feature extraction encoder = Model(inputs=autoencoder.input, outputs=autoencoder.get_layer('encoder3').output) encoder.summary() history2 = autoencoder.fit(train_X, train_X, validation_split=VALIDATION_SPLIT, batch_size=p1['batch_size'], epochs=p1['epochs'], shuffle=False, callbacks=callbacks_list, verbose=1) pl.printPlotLoss(history2, 'autoencoder') plot_model(autoencoder, to_file='autoencoder.png', show_shapes=True, show_layer_names=True) ''' Save weigths from autoencoder model Weights are fixed in the classifier model ''' weights = [] i = 0 for layer in autoencoder.layers: weights.append(layer.get_weights()) # parameters for final model p2 = { 'batch_size': 256, 'epochs': 100, 'optimizer': optimizers.Adam, 'kernel_initializer': 'glorot_uniform', 'losses': 'binary_crossentropy', 'first_activation': 'tanh', 'second_activation': 'tanh', 'third_activation': 'relu' } model = m.MLP_WeightFixed(encoder, train_X, p2) history3 = model.fit(train_X, train_Y2, validation_split=VALIDATION_SPLIT, batch_size=p1['batch_size'], epochs=p1['epochs'], shuffle=False, callbacks=callbacks_list, verbose=1) pl.printPlotAccuracy(history3, 'finalModel1') pl.printPlotLoss(history2, 'finalModel1') model.save('modelfixedW.h5') plot_model(model, to_file='classifier.png', show_shapes=True, show_layer_names=True) predictions = model.predict(test_X) # Predicting the Test set results y_pred = np.argmax(predictions, axis=1) cm = confusion_matrix(test_Y, y_pred) acc = accuracy_score(test_Y, y_pred, normalize=True) LABELS = ["Attacks", "Normal"] print("Confusion matrix on test set") print(cm) print("Accuracy model on test set: " + str(acc)) plt.figure(figsize=(12, 12)) sns.heatmap(cm, xticklabels=LABELS, yticklabels=LABELS, annot=True, fmt="d") plt.title("Confusion matrix on test set") plt.ylabel('True class') plt.xlabel('Predicted class') plt.savefig(os.path.join(pathPlot, "confusion matrix")) plt.show() plt.close()