Exemplo n.º 1
0
class CumlSVMFitter(FitterBase):
    def __init__(self,
                 label='label',
                 metric='error',
                 opt: SVMOpt = None,
                 max_eval=100):
        super(CumlSVMFitter, self).__init__(label, metric, max_eval)
        if opt is not None:
            self.opt = opt
        else:
            self.opt = SVMOpt()
        self.clf = None

    def train(self, train_df, eval_df, params=None):
        train_df, eval_df = cudf.DataFrame(train_df), cudf.DataFrame(eval_df)
        x_train, y_train, x_eval, y_eval = train_df.drop(columns=[self.label]), train_df[self.label], \
                                           eval_df.drop(columns=[self.label]), eval_df[self.label],
        if params is None:
            use_params = deepcopy(self.opt_params)
        else:
            use_params = deepcopy(params)
        self.clf = SVC(**use_params)
        self.clf.fit(X=x_train, y=y_train)
        preds = self.clf.predict(X=x_eval)
        output = self.get_loss(y_pred=preds, y=y_eval)

        return output

    def search(self, train_df, eval_df):
        self.opt_params = dict()

        def train_impl(params):
            self.train(train_df, eval_df, params)
            if self.metric == 'auc':
                y_pred = self.clf.predict(eval_df.drop(columns=[self.label]))
            else:
                y_pred = self.clf.predict(
                    eval_df.drop(columns=[self.label])).astype(int)

            return self.get_loss(eval_df[self.label], y_pred)

        self.opt_params = fmin(train_impl,
                               asdict(self.opt),
                               algo=tpe.suggest,
                               max_evals=self.max_eval)

    def search_k_fold(self, k_fold, data):
        self.opt_params = dict()

        def train_impl_nfold(params):
            loss = list()
            for train_id, eval_id in k_fold.split(data):
                train_df = data.iloc[train_id, :]
                eval_df = data.iloc[eval_id, :]
                self.train(train_df, eval_df, params)
                if self.metric == 'auc':
                    y_pred = self.clf.predict(
                        eval_df.drop(columns=[self.label]))
                else:
                    y_pred = self.clf.predict(
                        eval_df.drop(columns=[self.label])).astype(int)
                loss.append(self.get_loss(eval_df[self.label], y_pred))
            return np.mean(loss)

        self.opt_params = fmin(train_impl_nfold,
                               asdict(self.opt),
                               algo=tpe.suggest,
                               max_evals=self.max_eval)

    def train_k_fold(self,
                     k_fold,
                     train_data,
                     test_data,
                     params=None,
                     drop_test_y=True):
        acc_result = list()
        train_pred = cudf.Series(np.empty(train_data.shape[0]))
        test_pred = cudf.Series(np.empty(test_data.shape[0]))
        if drop_test_y:
            dtest = test_data.drop(columns=self.label)
        else:
            dtest = test_data
        for train_id, eval_id in k_fold.split(train_data):
            train_df = train_data.iloc[train_id, :]
            eval_df = train_data.iloc[eval_id, :]
            self.train(train_df, eval_df, params)
            train_pred[eval_id] = self.clf.predict_proba(
                eval_df.drop(columns=self.label)).iloc[:, 1].values
            if self.metric == 'auc':
                y_pred = self.clf.predict(eval_df.drop(columns=[self.label]))
            else:
                y_pred = self.clf.predict(
                    eval_df.drop(columns=[self.label])).astype(int)

            acc_result.append(self.get_loss(eval_df[self.label], y_pred))
            test_pred += self.clf.predict_proba(dtest).iloc[:, 1]
        test_pred /= k_fold.n_splits
        return train_pred, test_pred, acc_result
Exemplo n.º 2
0
    else:
        C = 10
        gamma = 0.01
    clf = SVC(probability=True, C=C, gamma=gamma)
else:
    clf = LogisticRegression()  #normal case

clf.fit(X_train, y_train)

#save classifier
filename = './data/detectors/LR_' + attack_method + '_' + detector + '_' + mode + '_' + net + '.sav'
pickle.dump(clf, open(filename, 'wb'))

print('Evaluating classifier...')
prediction = clf.predict(X_test)
prediction_pr = clf.predict_proba(X_test)[:, 1]

benign_rate = 0
benign_guesses = 0
ad_guesses = 0
ad_rate = 0
for i in range(len(prediction)):
    if prediction[i] == 0:
        benign_guesses += 1
        if y_test[i] == 0:
            benign_rate += 1
    else:
        ad_guesses += 1
        if y_test[i] == 1:
            ad_rate += 1