def runScenario(data_file, test_size): # inits legends = [] precisionList = [] recallList = [] # data source data_file = '{}.csv'.format(data_file) #data_file = 'data/ICMP_P2_Rx_Packet.csv' df = pd.read_csv(data_file, delimiter=',') #print(df.head()) features_columns = [] for x in range(len(fx.header) - 1): features_columns.append(x) features = df.iloc[:, features_columns].values target = df.iloc[:, [len(fx.header) - 1]].values X_train_default, X_test_default, y_train_default, y_test_default = train_test_split( features, target, random_state=0, test_size=test_size) X_train = X_train_default X_test = X_test_default y_train = y_train_default y_test = y_test_default print(X_train.shape, X_test.shape, y_train.shape, y_test.shape) # testing knn classifier model = KNeighborsClassifier(n_neighbors=1) model.fit(X_train, y_train) y_pred = model.predict(X_test) print(y_test, y_pred) lg.success('KNN: {:.4f}'.format(model.score(X_test, y_test))) #cm = confusion_matrix(y_test, y_pred) #fx.plot_cm(cm, title='KNN Confusion Matrix') fx.saveLinearModel('knn', model) #db.addAllData(data_file, test_size, y_test, y_pred, 'knn') p, r, l = fx.plotNPS('KNN', y_test, y_pred, test_size) precisionList.append(p) recallList.append(r) legends.append(l) # testing logistic regression model = LogisticRegression() model.fit(X_train, y_train) y_pred = model.predict(X_test) lg.success('Logistic Regression: {:.4f}'.format(model.score( X_test, y_test))) #fx.plot_cm(confusion_matrix(y_test, model.predict(X_test)), title='Logistic Regression Confusion Matrix') fx.saveLinearModel('lr', model) #db.addAllData(data_file, test_size, y_test, y_pred, 'lr') p, r, l = fx.plotNPS('LR', y_test, y_pred, test_size) precisionList.append(p) recallList.append(r) legends.append(l) # testing linear svc model = LinearSVC() model.fit(X_train, y_train) lg.success('LinearSVC: {:.4f}'.format(model.score(X_test, y_test))) #fx.plot_cm(confusion_matrix(y_test, model.predict(X_test)), title='LinearSVC Confusion Matrix') fx.saveLinearModel('lsvc', model) y_pred = model.predict(X_test) #db.addAllData(data_file, test_size, y_test, y_pred, 'lsvc') p, r, l = fx.plotNPS('LSVC', y_test, y_pred, test_size) precisionList.append(p) recallList.append(r) legends.append(l) # testing svc model = SVC() model.fit(X_train, y_train) lg.success('SVC: {:.4f}'.format(model.score(X_test, y_test))) #fx.plot_cm(confusion_matrix(y_test, model.predict(X_test)), title='SVC Confusion Matrix') fx.saveLinearModel('svc', model) y_pred = model.predict(X_test) #db.addAllData(data_file, test_size, y_test, y_pred, 'svc') p, r, l = fx.plotNPS('SVC', y_test, y_pred, test_size) precisionList.append(p) recallList.append(r) legends.append(l) # testing decision tree model = DecisionTreeClassifier(random_state=0, max_depth=7) model.fit(X_train, y_train) lg.success('Decision Tree: {:.4f}'.format(model.score(X_test, y_test))) #fx.plot_cm(confusion_matrix(y_test, model.predict(X_test)), title='Decision Tree Confusion Matrix') fx.saveLinearModel('dt', model) y_pred = model.predict(X_test) #db.addAllData(data_file, test_size, y_test, y_pred, 'dt') p, r, l = fx.plotNPS('DT', y_test, y_pred, test_size) precisionList.append(p) recallList.append(r) legends.append(l) # testing random forest model = RandomForestClassifier(n_estimators=100, random_state=0) model.fit(X_train, y_train) lg.success('Random Forest: {:.4f}'.format(model.score(X_test, y_test))) #fx.plot_cm(confusion_matrix(y_test, model.predict(X_test)), title='Random Forest Confusion Matrix') fx.saveLinearModel('rf', model) y_pred = model.predict(X_test) #db.addAllData(data_file, test_size, y_test, y_pred, 'rf') p, r, l = fx.plotNPS('RF', y_test, y_pred, test_size) precisionList.append(p) recallList.append(r) legends.append(l) # testing gradientboosting classifier model = GradientBoostingClassifier(random_state=0) model.fit(X_train, y_train) lg.success('Gradient Boosting: {:.4f}'.format(model.score(X_test, y_test))) #fx.plot_cm(confusion_matrix(y_test, model.predict(X_test)), title='Gradient Boosting Confusion Matrix') fx.saveLinearModel('gb', model) y_pred = model.predict(X_test) #db.addAllData(data_file, test_size, y_test, y_pred, 'gb') p, r, l = fx.plotNPS('GB', y_test, y_pred, test_size) precisionList.append(p) recallList.append(r) legends.append(l) # testing naive bayesian classifiers model = GaussianNB() model.fit(X_train, y_train) lg.success('Gaussian NB: {:.4f}'.format(model.score(X_test, y_test))) #fx.plot_cm(confusion_matrix(y_test, model.predict(X_test)), title='Gaussian NB Confusion Matrix') fx.saveLinearModel('gnb', model) y_pred = model.predict(X_test) #db.addAllData(data_file, test_size, y_test, y_pred, 'gnb') p, r, l = fx.plotNPS('GNB', y_test, y_pred, test_size) precisionList.append(p) recallList.append(r) legends.append(l) model = BernoulliNB() model.fit(X_train, y_train) lg.success('Bernoulli NB: {:.4f}'.format(model.score(X_test, y_test))) #fx.plot_cm(confusion_matrix(y_test, model.predict(X_test)), title='Bernoulli NB Confusion Matrix') fx.saveLinearModel('bnb', model) y_pred = model.predict(X_test) #db.addAllData(data_file, test_size, y_test, y_pred, 'bnb') p, r, l = fx.plotNPS('BNB', y_test, y_pred, test_size) precisionList.append(p) recallList.append(r) legends.append(l) model = MultinomialNB() model.fit(X_train, y_train) lg.success('Multinomial NB: {:.4f}'.format(model.score(X_test, y_test))) #fx.plot_cm(confusion_matrix(y_test, model.predict(X_test)), title='Multinomial NB Confusion Matrix') fx.saveLinearModel('mnb', model) y_pred = model.predict(X_test) #db.addAllData(data_file, test_size, y_test, y_pred, 'mnb') p, r, l = fx.plotNPS('MNB', y_test, y_pred, test_size) precisionList.append(p) recallList.append(r) legends.append(l) # neural network input_data_shape = X_train.shape[1] num_features = y_train.shape[1] batch_size = 64 epochs = 100 X_train = np.reshape( X_train_default, (X_train_default.shape[0], 1, X_train_default.shape[1])) X_test = np.reshape(X_test_default, (X_test_default.shape[0], 1, X_test_default.shape[1])) lg.warning('\n\nNeural Network') lg.success("Num features: {}".format(num_features)) model = rmd.lstm(input_data_shape, num_features) try: plot_model(model, to_file='./rnn_lstm.eps') except: pass plot_model(model, to_file='./rnn_lstm.png') #model.summary(print_fn=fx.rnnprint) model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs) res = model.evaluate(X_test, y_test) lg.success('LSTM Accuracy: {:.4f}'.format(res[3])) y_pred = np.around(model.predict(X_test)) y_pred = np.array([int(i) for i in y_pred]) print(y_test, y_pred) #fx.plot_cm(confusion_matrix(y_test, y_pred), title='LSTM Confusion Matrix') #print('Test Loss: {:2f}'.format(res[2])) #sys.exit() # save model model.save('lstm_model.h5') lg.success('[+] Model saved') #db.addAllData(data_file, test_size, y_test, y_pred, 'lstm') p, r, l = fx.plotNPS('LSTM', y_test, y_pred, test_size) precisionList.append(p) recallList.append(r) legends.append(l) # convolutional neural network lg.warning('\n\nCNN') X_train = np.reshape( X_train_default, (X_train_default.shape[0], X_train_default.shape[1], 1, 1)) y_train = np.reshape( y_train_default, (y_train_default.shape[0], y_train_default.shape[1], 1, 1)) X_test = np.reshape( X_test_default, (X_test_default.shape[0], X_test_default.shape[1], 1, 1)) y_test = np.reshape( y_test_default, (y_test_default.shape[0], y_test_default.shape[1], 1, 1)) print(X_train.shape, y_train.shape, X_test.shape, y_test.shape) model = cmd.default(X_train.shape[1:]) try: plot_model(model, to_file='./cnn.eps') except: pass plot_model(model, to_file='./cnn.png') #model.summary(print_fn=fx.cnnprint) model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size) res = model.evaluate(X_test, y_test) lg.success('CNN Accuracy: {:.4f}'.format(res[1])) y_pred = np.around(model.predict(X_test)) y_pred = np.array([int(i) for i in y_pred]) print(y_test_default, y_pred) #fx.plot_cm(confusion_matrix(y_test_default, y_pred), title='CNN Confusion Matrix') model.save('lstm_model.h5') lg.success('[+] Model saved') #db.addAllData(data_file, test_size, y_test_default, y_pred, 'cnn') p, r, l = fx.plotNPS('CNN', y_test_default, y_pred, test_size) precisionList.append(p) recallList.append(r) legends.append(l) # plotting summary fx.plotSummary(precisionList, recallList, legends, test_size)
#classifier.add(Dense(output_dim = 8, init = 'uniform', activation = 'relu')) # Adding the output layer classifier.add(Dense(output_dim=3, init='uniform', activation='softmax')) # Compiling the ANN classifier.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # Fitting the ANN to the Training set classifier.fit(X_train.toarray(), Y_train, batch_size=20, nb_epoch=100, verbose=1) #######Classifer Evaluation ########### score = classifier.evaluate(X_test.toarray(), Y_test, batch_size=20, verbose=1) print('Test score:', score[0]) print('accuracy:', score[1]) ## Predicting the Test set results #y_pred = classifier.predict(X_test.toarray()) ##y_pred = (y_pred > 0.5) #for i in range(0,len(y_pred)): # #len(y_pred)): # max_y_pred = max(y_pred[i, :]) # for j in range (0,3): # if y_pred[i, j] == max_y_pred: # y_pred[i, j] = 1 # else: # y_pred[i, j] =0 # i = i +1
cv_scores = cross_val_score(LR, all_features2, all_classes, cv=10) print("cv_Score by Logistic Regression with sigmoid kernals is:", cv_scores.mean()) #.............................................................Keras Neural Network from keras.wrappers.scikit_learn import KerasClassifier from keras.models import Sequential from keras.layers import Dense, Activation # def create_model(): model = Sequential() model.add(Dense(8, input_dim=4, kernel_initializer='normal', activation='relu')) model.add(Dense(4, kernel_initializer='normal', activation='relu')) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) # return model history = model.fit(X_train, y_train, batch_size=100, epochs=30, verbose=2, validation_data=(X_test, y_test)) # ............................................................test data error by Keras score = model.evaluate(X_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1])
le = LabelEncoder() ohe = OneHotEncoder() Y_train_ec = le.fit_transform(Y_train) Y_train_ec = ohe.fit_transform(Y_train_ec.reshape(-1,1)) Y_validation_ec = le.fit_transform(Y_validation) Y_validation_ec = ohe.fit_transform(Y_validation_ec.reshape(-1,1)) # Model : INPUT(4) => FC(8) => RELU => FC(8) => SOFTMAX model = Sequential() model.add(Dense(10, input_dim=4, init= "uniform" , activation= "relu")) model.add(Dense(10, init= "uniform" , activation= "relu" )) model.add(Dense(3, init= "uniform" , activation= "softmax" )) model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"]) model.fit(X_train, Y_train_ec , nb_epoch=150, batch_size=10) scores = model.evaluate(X_train, Y_train_ec) print('Keras accuracy: %.2f' % (scores[1]*100)) model.evaluate(X_train, Y_train_ec) model.evaluate(X_validation, Y_validation_ec)
# This fitting procedure feeds datapoints to the network one by one model.fit(train_counts, train_labels, batch_size=100, epochs=200, callbacks=callbacks, verbose=1, validation_data=(test_counts, test_labels) ) #steps per epoch is set by the smallest training set size ############################### ####Testing the model########## ############################### print "TRAIN eval:", model.evaluate(train_counts, train_labels) print "TEST eval:", model.evaluate(test_counts, test_labels) print "##########################" pred_probas = model.predict(test_counts) print np.shape(pred_probas), type(pred_probas) preds = pred_probas > 0.5 print pred_probas[:10], preds[:10] print "confusion_matrix\n", confusion_matrix(test_labels, np.array(preds, dtype=int)) print "ROC area under the curve \n", roc_auc_score(test_labels, pred_probas) #np.savetxt("30N_cls_cnf_predictions.csv",pred_matrix[:,:3],delimiter=',', fmt=["%d","%d","%.3f"])
class Classifier(): """ Dense Neural Net Classifier using encoding of autoencoder """ def __init__(self, model_name, autoencoder): """ :param autoencoder: encoder to be used by classifier """ self.modelName = model_name self.model = None if model_name == "SVM": self.model = SVC() if model_name == "LR": self.model = LogisticRegression() if model_name == "RF": self.model = RandomForestClassifier() if model_name == "KNN": self.model = KNeighborsClassifier() if model_name == "DNN": self.model = K.models.Sequential([ K.layers.Dense(128, input_shape=(256, )), K.layers.Activation('relu'), K.layers.Dense(64), K.layers.Activation('relu'), K.layers.Dense(32), K.layers.Activation('relu'), K.layers.Dense(4), K.layers.Activation('softmax') ]) self.model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) self.encoder = autoencoder.getEncoder() def saveArchitecture(self, path): """ Saves an image of the model architecture :return: """ print(self.model.summary()) K.utils.plot_model(self.model, to_file=path, show_shapes=True) def train(self, X_train, y_train, X_test, y_test): """ :param X_train: training features :param y_train: training labels :param X_test: validation features :param y_test: validation labels :return: """ X_train, X_test = self.encoder.predict(X_train), self.encoder.predict( X_test) print("Training", self.modelName) if self.modelName == "DNN": self.model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32) else: self.model.fit(X_train, y_train) if self.modelName == "DNN": print("Train Accuracy of DNN =", self.model.evaluate(X_train, y_train)[1]) print("Validation Accuracy of DNN =", self.model.evaluate(X_test, y_test)[1]) else: print("Train Accuracy of", self.modelName, "=", self.model.score(X_train, y_train)) print("Validation Accuracy", self.modelName, "=", self.model.score(X_test, y_test)) def predict(self, X_test): """ :param X_test: prediction data :return: predicted labels """ X_test = self.encoder.predict(X_test) return self.model.predict(X_test)
for name, activation in ACTIVATIONS.items(): model = Sequential() model.add(Dense(UNITS, input_dim=4)) for l in range(LAYERS): if l != 0: model.add(Dense(UNITS)) model.add(activation) model.add(Dense(3)) model.add(Activation('softmax')) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) def fn(): model.fit(X_train, y_train_oh, epochs=EPOCHS, verbose=0) t = timeit.timeit(fn, number=1) accuracy = model.evaluate(X_test, y_test_oh, verbose=0)[1] results['Neural Network ({})'.format(name)] = { 'training_time': t, 'accuracy': accuracy } #Results for algo in sorted(results.keys()): print('{}\n Accuracy:{:>8.2%}\n Time:{:>11.3}s'.format( algo, float(results[algo]['accuracy']), results[algo]['training_time']))