class Classification(Supervised): def __init__(self, X, y, split=True, split_ratio=0.2): Supervised.__init__(self, X, y, split, split_ratio) self.LR = None self.DTC = None self.RFC = None self.GNB = None def fit(): """ Acronyms ---------- LR : Logistic Regression DTC : Decision Tree Classifier RFC : Random Forest Classifier GNB : Gaussian Naive Bayes Returns ------- None """ self.LR = LogisticRegression(random_state=0).fit(X_train, y_train) self.DTC = DecisionTreeClassifier().fit(X_train, y_train) self.RFC = RandomForestClassifier(max_depth=None, random_state=0).fit( X_train, y_train) self.GNB = GaussianNB().fit(X_train, y_train) def evaluate(): if (self.X_test != None): lr_eval = self.LR.evaluate(X_test, y_test) dtc_eval = self.DTC.evaluate(X_test, y_test) rfc_eval = self.RFC.evaluate(X_test, y_test) gnb_eval = self.GNB.evaluate(X_test, y_test)
model = Sequential() model.add(Dense(8, input_dim=16)) model.add(Dense(4)) model.add(Dense(1)) # compile the keras model model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) # fit the keras model on the dataset # model.fit(X, y, epochs=100, batch_size=25) model.fit(X, y, epochs=100, batch_size=32) # evaluate the keras model _, accuracy = model.evaluate(X, y) print('Accuracy: %.2f' % (accuracy * 100), '%') # Single Hidden Layer ANN with Holdout from sklearn.model_selection import train_test_split from sklearn.metrics import * from sklearn.neural_network import MLPClassifier from time import time test_start = time() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
def run_model(model_name, review, X, y, CNN_param_list=None): starttime = datetime.datetime.now() names = globals() print(model_name) for ngram in [1, 2, 3, 4]: for feature in ['TF', 'TFIDF']: for max_feature in [1000, 5000, 10000]: print('the accuracy for ', ngram, 'gram', feature, 'with max feature number of', max_feature, 'is') tf_feature, row, col = names[feature](review, ngram, max_feature) train = np.concatenate((X, tf_feature), axis=1) if model_name == 'SVM': clf = svm.SVC(kernel='linear', C=1) scores = cross_val_score(clf, train, y, cv=5, scoring='accuracy') print(scores.mean().round(4)) elif model_name == 'DT': clf = DecisionTreeClassifier(random_state=0, max_depth=2) scores = cross_val_score(clf, train, y, cv=5, scoring='accuracy') print(scores.mean().round(4)) elif model_name == 'Xgboost': clf = xgb.XGBClassifier() scores = cross_val_score(clf, train, y, cv=5, scoring='accuracy') print(scores.mean().round(4)) elif model_name == 'RandomForest': clf = RandomForestClassifier(n_estimators=1000, criterion='entropy', random_state=42) scores = cross_val_score(clf, train, y, cv=5, scoring='accuracy') print(scores.mean().round(4)) elif model_name == 'LR': clf = LogisticRegression(random_state=0) scores = cross_val_score(clf, train, y, cv=5, scoring='accuracy') print(scores.mean().round(4)) elif model_name == 'CNN': y = np.array(y) y.resize(1604, 1) for param in CNN_param_list: print('for CNN with param ', param) kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=50) scores = [] clf = create_CNN(col + 2, param) for train_idx, test_idx in kfold.split(train, y): clf.fit(train[train_idx], y[train_idx], epochs=10, batch_size=32, verbose=0) cvscore = clf.evaluate(train[test_idx], y[test_idx], verbose=0) scores.append(cvscore[1]) scores = np.array(scores) print(scores.mean().round(4)) endtime = datetime.datetime.now() print('the running time is', (endtime - starttime).seconds)
classifier.add( Dense(units=1, kernel_initializer='uniform', activation='sigmoid')) classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) # Phu hop mang luoi vao tap train classifier.fit(X_train, y_train, batch_size=32, epochs=100) # Du doan ket qua tap thu y_pred = classifier.predict(X_test) y_pred = (y_pred > 0.5) score = classifier.evaluate(X_test, y_test) score # Tao ma tran hon loan from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred) cm # Xem mau duoc train from sklearn.metrics import classification_report print(classification_report(y_test, y_pred)) # Ma tran hon loan
distortion_noise=np.sum(np.abs(f_evaluate_origin[i,:]-f_evaluate_noise[i,:])) p_value=0.0 if np.abs(f_evaluate_origin_score[i]-0.5)<=np.abs(f_evaluate_defense_score[i]-0.5): p_value=0.0 else: p_value=min(epsilon_value/distortion_noise,1.0) if predict_result_origin[i]==label_test[i]: inference_accuracy+=1.0-p_value if predict_result_defense[i]==label_test[i]: inference_accuracy+=p_value inference_accuracy_list.append(inference_accuracy/(float(f_evaluate_origin.shape[0]))) print("Budget list: {}".format(epsilon_value_list)) print("inference accuracy list: {}".format(inference_accuracy_list)) logger.debug("Budget list: {}".format(epsilon_value_list)) logger.debug("inference accuracy list: {}".format(inference_accuracy_list)) else: scores_test_defense = model.evaluate(b_test, label_test, verbose=0) logger.debug("[Atteck Model]Test loss defense:{}".format(scores_test_defense[0])) logger.debug("[Atteck Model]Test accuracy defense:{}".format(scores_test_defense[1])) count=0 for i in np.arange(f_evaluate_origin.shape[0]): distortion_noise=np.sum(np.abs(f_evaluate_origin[i,:]-f_evaluate_noise[i,:])) if predict_result_origin[i]==label_test[i]: count+=1 #logger.debug("inference accuracy: {}".format((count+0.0)/f_evaluate_origin.shape[0]))