#loaded_model = load("model_SVC.joblib") #SVM.test_svm_classifier(loaded_model, val_data, val_labels) loaded_model = load_model("models/best_model_DNN_Adam.h5") NN.test_neural_network(loaded_model, val_data, val_labels) if __name__ == "__main__": total_features = 545333 # total unique features testing_set_size = 1500 # set site that will be used to create random test set malware_ratio = 0.3 # malware ratio in the set size print("Creating data-labels...") onehot.create_list_of_apps() # function from set_one_encoding.py # initialize sklearn models GNB = models.GaussianNaiveBayes() MNB = models.MultinomialNaiveBayes() CNB = models.ComplementNaiveBayes() BNB = models.BernoulliNaiveBayes() DT = models.DecisionTree() RF = models.RandomForest() KNN = models.KNearestNeighbors() LR = models.LogRegression() SVM = models.SupportVectorMachine() val_runs = 8 #evaluate_models(val_runs) evaluate_on_test_set()
def train_external_detector(): train_data, train_labels, test_data, test_labels = create_sets() trained_model = tf.keras.models.load_model('best_model_Adam.h5') predict_original = trained_model.predict(train_data) confusion = confusion_matrix(train_labels, np.argmax(predict_original, axis=1)) TP = confusion[1, 1] TN = confusion[0, 0] FP = confusion[0, 1] FN = confusion[1, 0] FNR_original = FN / float(FN + TP) * 100 FPR = FP / float(FP + TN) * 100 accuracy = ((TP + TN) / float(TP + TN + FP + FN)) * 100 print(confusion) print("Original FP:", FP, "- FN:", FN, "- TP:", TP, "- TN", TN) print("Original Accuracy:", accuracy, "- FPR:", FPR, "- FNR:", FNR_original) average_changes = 0 amount_malwares = 0 averageChanges = 0 # the numpy array will be filled dynamically adversarial_data = np.zeros((0, 3880), dtype=float) for i in range(len(train_data)): if train_labels[i] == 1: x = train_data[i:i + 1] # print("x: ", x) # print(x.shape) try: adv_x, changes = craft_adversarial_samples( x, 0, trained_model, 1) # print(adv_x) # append the adversarial data to the numpy array adversarial_data = np.concatenate((adversarial_data, adv_x)) if changes >= 0: average_changes += changes amount_malwares += 1 except NameError: pass except ValueError: pass if amount_malwares > 0: averageChanges += (average_changes / float(amount_malwares)) train_data, train_labels, test_data, test_labels = create_sets() predictions = trained_model.predict(train_data) confusion = confusion_matrix(train_labels, np.argmax(predictions, axis=1)) print(confusion) TP = confusion[1, 1] TN = confusion[0, 0] FP = confusion[0, 1] FN = confusion[1, 0] FNR = FN / float(FN + TP) * 100 FPR = FP / float(FP + TN) * 100 accuracy = ((TP + TN) / float(TP + TN + FP + FN)) * 100 print("Adversarial FP:", FP, "- FN:", FN, "- TP:", TP, "- TN", TN) print("Adversarial Accuracy:", accuracy, "- FPR:", FPR, "- FNR:", FNR) print("Misclassification Rate:", FNR - FNR_original) print("Distortion:", averageChanges) predictions = trained_model.predict(adversarial_data) adversarial_labels = np.ones((len(adversarial_data), ), dtype=int) confusion = confusion_matrix(adversarial_labels, np.argmax(predictions, axis=1)) print(confusion) TP = confusion[1, 1] TN = confusion[0, 0] FP = confusion[0, 1] FN = confusion[1, 0] FNR = FN / float(FN + TP) * 100 FPR = FP / float(FP + TN) * 100 accuracy = ((TP + TN) / float(TP + TN + FP + FN)) * 100 print("Adversarial FP:", FP, "- FN:", FN, "- TP:", TP, "- TN", TN) print("Adversarial Accuracy:", accuracy, "- FPR:", FPR, "- FNR:", FNR) print("Misclassification Rate:", FNR - FNR_original) print("Distortion:", averageChanges) print(changes_dict) del predict_original, FNR_original, predictions, confusion, TP, TN, FP, FN, FNR, FPR, accuracy # concatenate legit with produced adversarial input final_train_data = np.concatenate((train_data, adversarial_data)) print("final train data shape:", final_train_data.shape) train_labels = np.zeros((len(train_labels), ), dtype=int) # fill with 0 (the original class) print("train labels shape:", train_labels.shape) adverarial_labels = np.ones( (len(adversarial_data), ), dtype=int) # fill with 1 (the adversarial class) print("adversarial labels:", adverarial_labels.shape) final_train_labels = np.concatenate((train_labels, adverarial_labels)) print("final labels shape:", final_train_labels.shape) print("Unique classes:", np.unique(final_train_labels)) del train_data, train_labels, adversarial_data, adverarial_labels #shuffle the set shuffle(final_train_data, final_train_labels, random_state=123) # train with the augmented dataset (with adverarial examples belong to class '1') model = generate_neural_network(total_features, [200, 200], 0.2, 0.001, "glorot_uniform", "zeros", "relu", 2) train_neural_network(model, epochs=30, batch_size=150, features=final_train_data, labels=final_train_labels, verbose=2, validation=True, val_data=final_train_data, val_labels=final_train_labels, callbacks=True, path=dir_path + "logs/fit/", model_name="external_detector_2") GNB = models.GaussianNaiveBayes() MNB = models.MultinomialNaiveBayes() CNB = models.ComplementNaiveBayes() BNB = models.BernoulliNaiveBayes() DT = models.DecisionTree() RF = models.RandomForest() KNN = models.KNearestNeighbors() LR = models.LogRegression() SVM = models.SupportVectorMachine() model = GNB.train_gaussian_naive_bayes_classifier( final_train_data, final_train_labels) # train Naive Bayes score_GNB = GNB.evaluate_gaussian_naive_bayes_classifier( model, final_train_data, final_train_labels) # test performance print("GNB", score_GNB) model = MNB.train_multi_naive_bayes_classifier(final_train_data, final_train_labels) score_MNB = MNB.evaluate_multi_naive_bayes_classifier( model, final_train_data, final_train_labels) print("MNB", score_MNB) model = CNB.train_complement_naive_bayes_classifier( final_train_data, final_train_labels) score_CNB = CNB.evaluate_complement_naive_bayes_classifier( model, final_train_data, final_train_labels) print("CNB", score_CNB) model = BNB.train_bernoulli_naive_bayes_classifier(final_train_data, final_train_labels) score_BNB = BNB.evaluate_bernoulli_naive_bayes_classifier( model, test_data, test_labels) print("BNB", score_BNB) model = DT.train_decision_tree_classifier( final_train_data, final_train_labels) # train Decision Tree Classifier score_dt = DT.evaluate_decision_tree_classifier(model, final_train_data, final_train_labels) print("DT:", score_dt) model = LR.train_logistic_regression_classifier( final_train_data, final_train_labels) # train logistic Regression score_lr = LR.evaluate_logistic_regression_classifier( model, final_train_data, final_train_labels) print("LR", score_lr) model = KNN.train_knn_classifier( final_train_data, final_train_labels) # train k-Nearest Neighbors Classifier score_knn = KNN.evaluate_knn_classifier(model, final_train_data, final_train_labels) print("KNN", score_knn) model = SVM.train_svm_classifier( final_train_data, final_train_labels) # train Support Vector Machines score_svm = SVM.evaluate_svm_classifier(model, final_train_data, final_train_labels) print("SVM", score_svm) model = RF.train_random_forest_classifier( final_train_data, final_train_labels) # train Random Forest score_rf = RF.evaluate_random_forest_classifier(model, final_train_data, final_train_labels) print("RF:", score_rf)