Пример #1
0
def main():
    pd.set_option('display.expand_frame_repr', False)
    pathFolder = input("Insert dataset path folder  (tips: dataset):")
    pathDataset = input("Insert dataset path folder  (tips: KDD99.csv):")
    pathPlot = input("Insert plot path folder  (tips: plot):")
    df = pd.read_csv(os.path.join(pathFolder, pathDataset), delimiter=",")
    print("Dataset shape: ", df.shape)
    print("Dataset before preprocessing: ")
    print(df.head(5))

    #Show distinct classification target
    distinctLabels = df[df.columns[-1]].unique().tolist()
    N_CLASSES = len(distinctLabels)

    print("Start preprocessing step")
    numericColumn = df.select_dtypes(include=[np.number]).columns.tolist(
    )  #retrieve all numerical columns for standard scaler
    classificationCol = df.columns[-1]  #name of target column
    print(classificationCol)

    #preprocessing: map target from categorical to numeric and one-hot encoding at categorical columns
    df = preprocessingDS(df)
    print("Dataset after one-hot encoding:")
    print(df.head(5))

    #preprocessing: standar scaler
    df = scaler(df, numericColumn)

    #Split function on train and testing set
    sizesplit = split_dataset()
    pl = Plot(pathPlot)

    train, test = train_test_split(df, test_size=sizesplit)

    print("Train shape after split: ", train.shape)
    print("Test shape after split: ", test.shape)
    pl.plotStatistics(train, test, classificationCol)

    train_X, train_Y, test_X, test_Y = getXY(train, test, classificationCol)
    # convert class vectors to binary class matrices
    train_Y2 = np_utils.to_categorical(train_Y, N_CLASSES)

    callbacks_list = [
        callbacks.EarlyStopping(monitor='val_loss',
                                patience=4,
                                restore_best_weights=True)
    ]

    m = Models(N_CLASSES)

    VALIDATION_SPLIT = 0.1
    print('Model with autoencoder+softmax with fixed encoder weights')
    # parametri per autoencoder
    p1 = {
        'first_layer': 60,
        'second_layer': 30,
        'third_layer': 10,
        'batch_size': 64,
        'epochs': 150,
        'optimizer': optimizers.Adam,
        'kernel_initializer': 'glorot_uniform',
        'losses': 'mse',
        'first_activation': 'tanh',
        'second_activation': 'tanh',
        'third_activation': 'tanh'
    }

    autoencoder = m.deepAutoEncoder(train_X, p1)
    autoencoder.summary()

    #get encoder for feature extraction
    encoder = Model(inputs=autoencoder.input,
                    outputs=autoencoder.get_layer('encoder3').output)
    encoder.summary()

    history2 = autoencoder.fit(train_X,
                               train_X,
                               validation_split=VALIDATION_SPLIT,
                               batch_size=p1['batch_size'],
                               epochs=p1['epochs'],
                               shuffle=False,
                               callbacks=callbacks_list,
                               verbose=1)

    pl.printPlotLoss(history2, 'autoencoder')
    plot_model(autoencoder,
               to_file='autoencoder.png',
               show_shapes=True,
               show_layer_names=True)
    '''
 Save weigths from autoencoder model
 Weights are fixed in the classifier model
 '''
    weights = []
    i = 0
    for layer in autoencoder.layers:
        weights.append(layer.get_weights())

    # parameters for final model
    p2 = {
        'batch_size': 256,
        'epochs': 100,
        'optimizer': optimizers.Adam,
        'kernel_initializer': 'glorot_uniform',
        'losses': 'binary_crossentropy',
        'first_activation': 'tanh',
        'second_activation': 'tanh',
        'third_activation': 'relu'
    }

    model = m.MLP_WeightFixed(encoder, train_X, p2)

    history3 = model.fit(train_X,
                         train_Y2,
                         validation_split=VALIDATION_SPLIT,
                         batch_size=p1['batch_size'],
                         epochs=p1['epochs'],
                         shuffle=False,
                         callbacks=callbacks_list,
                         verbose=1)

    pl.printPlotAccuracy(history3, 'finalModel1')
    pl.printPlotLoss(history2, 'finalModel1')
    model.save('modelfixedW.h5')
    plot_model(model,
               to_file='classifier.png',
               show_shapes=True,
               show_layer_names=True)

    predictions = model.predict(test_X)

    # Predicting the Test set results
    y_pred = np.argmax(predictions, axis=1)
    cm = confusion_matrix(test_Y, y_pred)
    acc = accuracy_score(test_Y, y_pred, normalize=True)
    LABELS = ["Attacks", "Normal"]
    print("Confusion matrix on test set")
    print(cm)
    print("Accuracy model on test set: " + str(acc))
    plt.figure(figsize=(12, 12))
    sns.heatmap(cm,
                xticklabels=LABELS,
                yticklabels=LABELS,
                annot=True,
                fmt="d")
    plt.title("Confusion matrix on test set")
    plt.ylabel('True class')
    plt.xlabel('Predicted class')
    plt.savefig(os.path.join(pathPlot, "confusion matrix"))
    plt.show()
    plt.close()