Exemplo n.º 1
0
class Classification(Supervised):
    def __init__(self, X, y, split=True, split_ratio=0.2):
        Supervised.__init__(self, X, y, split, split_ratio)
        self.LR = None
        self.DTC = None
        self.RFC = None
        self.GNB = None

    def fit():
        """

        Acronyms
        ----------
        LR : Logistic Regression
        DTC : Decision Tree Classifier
        RFC : Random Forest Classifier
        GNB : Gaussian Naive Bayes

        Returns
        -------
        
        None

        """
        self.LR = LogisticRegression(random_state=0).fit(X_train, y_train)
        self.DTC = DecisionTreeClassifier().fit(X_train, y_train)
        self.RFC = RandomForestClassifier(max_depth=None, random_state=0).fit(
            X_train, y_train)
        self.GNB = GaussianNB().fit(X_train, y_train)

    def evaluate():
        if (self.X_test != None):
            lr_eval = self.LR.evaluate(X_test, y_test)
            dtc_eval = self.DTC.evaluate(X_test, y_test)
            rfc_eval = self.RFC.evaluate(X_test, y_test)
            gnb_eval = self.GNB.evaluate(X_test, y_test)
model = Sequential()
model.add(Dense(8, input_dim=16))
model.add(Dense(4))
model.add(Dense(1))

# compile the keras model
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# fit the keras model on the dataset
# model.fit(X, y, epochs=100, batch_size=25)
model.fit(X, y, epochs=100, batch_size=32)

# evaluate the keras model
_, accuracy = model.evaluate(X, y)
print('Accuracy: %.2f' % (accuracy * 100), '%')

# Single Hidden Layer ANN with Holdout

from sklearn.model_selection import train_test_split
from sklearn.metrics import *
from sklearn.neural_network import MLPClassifier
from time import time

test_start = time()
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=1)
Exemplo n.º 3
0
def run_model(model_name, review, X, y, CNN_param_list=None):
    starttime = datetime.datetime.now()
    names = globals()
    print(model_name)
    for ngram in [1, 2, 3, 4]:
        for feature in ['TF', 'TFIDF']:
            for max_feature in [1000, 5000, 10000]:
                print('the accuracy for ', ngram, 'gram', feature,
                      'with max feature number of', max_feature, 'is')
                tf_feature, row, col = names[feature](review, ngram,
                                                      max_feature)
                train = np.concatenate((X, tf_feature), axis=1)
                if model_name == 'SVM':
                    clf = svm.SVC(kernel='linear', C=1)
                    scores = cross_val_score(clf,
                                             train,
                                             y,
                                             cv=5,
                                             scoring='accuracy')
                    print(scores.mean().round(4))

                elif model_name == 'DT':
                    clf = DecisionTreeClassifier(random_state=0, max_depth=2)
                    scores = cross_val_score(clf,
                                             train,
                                             y,
                                             cv=5,
                                             scoring='accuracy')
                    print(scores.mean().round(4))

                elif model_name == 'Xgboost':
                    clf = xgb.XGBClassifier()
                    scores = cross_val_score(clf,
                                             train,
                                             y,
                                             cv=5,
                                             scoring='accuracy')
                    print(scores.mean().round(4))

                elif model_name == 'RandomForest':
                    clf = RandomForestClassifier(n_estimators=1000,
                                                 criterion='entropy',
                                                 random_state=42)
                    scores = cross_val_score(clf,
                                             train,
                                             y,
                                             cv=5,
                                             scoring='accuracy')
                    print(scores.mean().round(4))

                elif model_name == 'LR':
                    clf = LogisticRegression(random_state=0)
                    scores = cross_val_score(clf,
                                             train,
                                             y,
                                             cv=5,
                                             scoring='accuracy')
                    print(scores.mean().round(4))

                elif model_name == 'CNN':
                    y = np.array(y)
                    y.resize(1604, 1)
                    for param in CNN_param_list:
                        print('for CNN with param ', param)
                        kfold = StratifiedKFold(n_splits=5,
                                                shuffle=True,
                                                random_state=50)
                        scores = []
                        clf = create_CNN(col + 2, param)
                        for train_idx, test_idx in kfold.split(train, y):
                            clf.fit(train[train_idx],
                                    y[train_idx],
                                    epochs=10,
                                    batch_size=32,
                                    verbose=0)
                            cvscore = clf.evaluate(train[test_idx],
                                                   y[test_idx],
                                                   verbose=0)
                            scores.append(cvscore[1])

                        scores = np.array(scores)

                        print(scores.mean().round(4))

    endtime = datetime.datetime.now()
    print('the running time is', (endtime - starttime).seconds)
Exemplo n.º 4
0
classifier.add(
    Dense(units=1, kernel_initializer='uniform', activation='sigmoid'))
classifier.compile(optimizer='adam',
                   loss='binary_crossentropy',
                   metrics=['accuracy'])

# Phu hop mang luoi vao tap train

classifier.fit(X_train, y_train, batch_size=32, epochs=100)

# Du doan ket qua tap thu

y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)

score = classifier.evaluate(X_test, y_test)
score

# Tao ma tran hon loan

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm

# Xem mau duoc train

from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

# Ma tran hon loan
Exemplo n.º 5
0
            distortion_noise=np.sum(np.abs(f_evaluate_origin[i,:]-f_evaluate_noise[i,:]))
            p_value=0.0
            if np.abs(f_evaluate_origin_score[i]-0.5)<=np.abs(f_evaluate_defense_score[i]-0.5):
                p_value=0.0
            else:
                p_value=min(epsilon_value/distortion_noise,1.0)

            if predict_result_origin[i]==label_test[i]:
                inference_accuracy+=1.0-p_value
            if predict_result_defense[i]==label_test[i]:
                inference_accuracy+=p_value
        inference_accuracy_list.append(inference_accuracy/(float(f_evaluate_origin.shape[0])))


    print("Budget list: {}".format(epsilon_value_list))
    print("inference accuracy list: {}".format(inference_accuracy_list))     

    logger.debug("Budget list: {}".format(epsilon_value_list))
    logger.debug("inference accuracy list: {}".format(inference_accuracy_list))
else:
    scores_test_defense = model.evaluate(b_test, label_test, verbose=0)
    logger.debug("[Atteck Model]Test loss defense:{}".format(scores_test_defense[0]))
    logger.debug("[Atteck Model]Test accuracy defense:{}".format(scores_test_defense[1]))
    count=0
    for i in np.arange(f_evaluate_origin.shape[0]):
        distortion_noise=np.sum(np.abs(f_evaluate_origin[i,:]-f_evaluate_noise[i,:]))
            
        if predict_result_origin[i]==label_test[i]:
            count+=1
    #logger.debug("inference accuracy: {}".format((count+0.0)/f_evaluate_origin.shape[0]))