Пример #1
0
    def get_classifier(self, traindata, kf):

        x_tr, x_te, y_tr, y_te = fac.to_kfold(traindata, kf)
        acc_max, bestK, acc = 0, 0, [[] for a in range(kf)]

        for i in range(kf):

            # print('DOAO round', i, 'begin')
            # svm 00
            print('test00')
            clf_svm = SVC()
            clf_svm.fit(x_tr[i], y_tr[i].ravel())
            label_svm = clf_svm.predict(x_te[i])
            acc[i].append(fac.get_acc(y_te[i], label_svm)[0])

            # KNN 01
            print('test01')
            acc_k = []
            aux_k = [3, 5, 7]
            # for k in range(3, 12, 2):
            for k in aux_k:
                clf_knn = KNN_GPU(k=k)
                clf_knn.fit(x_tr[i], y_tr[i])
                label_knn = clf_knn.predict(x_te[i])
                acc_k.append(fac.get_acc(y_te[i], label_knn)[0])
            acc[i].append(max(acc_k))
            bestK = aux_k[acc_k.index(max(acc_k))]

            # LR 02
            print('test02')
            clf_lr = LogisticRegression()
            clf_lr.fit(x_tr[i], y_tr[i])
            label_LR = clf_lr.predict(x_te[i])
            acc[i].append(fac.get_acc(y_te[i], label_LR)[0])

            # XgBoost 03
            print('test03')
            clf_xgb = DecisionTreeClassifier()
            clf_xgb.fit(x_tr[i], y_tr[i])
            label_xgb = clf_xgb.predict(x_te[i])
            acc[i].append(fac.get_acc(y_te[i], label_xgb)[0])

            # RF 04
            print('test04')

            clf_rf = TGBMClassifier()
            clf_rf.fit(x_tr[i], y_tr[i])
            label_rf = clf_rf.predict(x_te[i])
            acc[i].append(fac.get_acc(y_te[i], label_rf)[0])

            print('DOAO round', i, 'end')

        acc = np.array(acc)
        acc_mean = acc.mean(axis=0)

        # fun_best = np.where(acc_mean == max(acc_mean))
        fun_best = np.argmax(acc_mean)

        return fun_best, bestK
Пример #2
0
    def get_classifier(self, train, kf):

        x_tr, x_te, y_tr, y_te = fac.to_kfold(train, kf)
        acc_max, bestK, acc = 0, 0, [[] for a in range(kf)]

        for i in range(kf):

            # print('DECOC round', i, 'begin')
            # svm 00
            clf_svm = SVC()
            clf_svm.fit(x_tr[i], y_tr[i].ravel())
            label_svm = clf_svm.predict(x_te[i])
            acc[i].append(fac.get_acc(y_te[i], label_svm)[0])

            # KNN 01
            acc_k = []
            aux_k = [3, 5, 7]
            # for k in range(3, 12, 2):
            for k in aux_k:
                clf_knn = KNN_GPU(k=k)
                clf_knn.fit(x_tr[i], y_tr[i])
                label_knn = clf_knn.predict(x_te[i])
                acc_k.append(fac.get_acc(y_te[i], label_knn)[0])
            acc[i].append(max(acc_k))
            bestK = aux_k[acc_k.index(max(acc_k))]

            # # LR 02
            # clf_lr = LR_GPU()
            # clf_lr.fit(x_tr[i], y_tr[i])
            # label_LR = clf_lr.predicted(x_te[i])
            # acc[i].append(fac.get_acc(y_te[i], label_LR)[0])

            # LR 02
            clf_lr = LogisticRegression()
            clf_lr.fit(x_tr[i], y_tr[i])
            label_LR = clf_lr.predict(x_te[i])
            acc[i].append(fac.get_acc(y_te[i], label_LR)[0])

            # CART 03
            clf_cart = DecisionTreeClassifier()
            clf_cart.fit(x_tr[i], y_tr[i])
            label_cart = clf_cart.predict(x_te[i])
            acc[i].append(fac.get_acc(y_te[i], label_cart)[0])

            # # RF 04
            clf_rf = TGBMClassifier()
            clf_rf.fit(x_tr[i], y_tr[i].ravel())
            label_rf = clf_rf.predict(x_te[i])
            acc[i].append(fac.get_acc(y_te[i], label_rf)[0])

            print('DECOC round', i, 'end')

        acc = np.array(acc)
        acc_mean = acc.mean(axis=0)
        # fun_best = np.where(acc_mean == max(acc_mean))
        fun_best = np.argmax(acc_mean)

        return fun_best, bestK
Пример #3
0
    def fun_predict(self, x_te, C, D, L):
        print('func_predict')

        num = len(D)
        cf = C[0]
        ck = C[1]

        allpre = np.zeros((len(x_te), num))
        for i in range(num):
            train = D[i]
            traindata = train[:, 0:-1]
            trainlabel = train[:, -1]

            if cf[i] == 0:
                # svm
                print('SVM predict')
                clf_svm = SVC()
                clf_svm.fit(traindata, trainlabel.ravel())
                label_svm = clf_svm.predict(x_te)
                allpre[:, i] = label_svm
            elif cf[i] == 1:
                # knn
                clf_knn = KNN_GPU(k=ck[i])
                clf_knn.fit(traindata, trainlabel)
                label_knn = clf_knn.predict(x_te)
                allpre[:, i] = label_knn
            elif cf[i] == 2:
                # LR
                print('LR predict')
                clf_lr = LogisticRegression()
                clf_lr.fit(traindata, trainlabel.ravel())
                label_LR = clf_lr.predict(x_te)
                allpre[:, i] = label_LR
            elif cf[i] == 3:
                # CART
                print('CART predict')
                clf_xgb = DecisionTreeClassifier()
                clf_xgb.fit(traindata, trainlabel)
                label_xgb = clf_xgb.predict(x_te)
                allpre[:, i] = label_xgb
            elif cf[i] == 4:
                # Rf
                print('RF predict')
                clf_rf = TGBMClassifier()
                clf_rf.fit(traindata, trainlabel.ravel())
                label_rf = clf_rf.predict(x_te)
                allpre[:, i] = label_rf
            else:
                print('error !!!! DOAO.fun_predict')

            label = L[i]
            for j in range(len(x_te)):
                allpre[j, i] = label[0] if allpre[j, i] == 0 else label[1]

        # print('predict end for')
        pre = mode(allpre, axis=1)[0]
        return pre
Пример #4
0
    def funcPreEDOVO(self, x_test, y_test, C, D):

        numC = np.asarray(C).shape[0]
        num_set = len(y_test)
        allpre = np.zeros([num_set, numC])

        for i in range(numC):

            train = D[i]
            traindata = np.array(train[:, 0:-1])
            trainlabel = np.array(train[:, -1], dtype='int64')
            if C[i, 0] == 0:
                print('test0')
                # svm
                clf_svm = SVC()
                clf_svm.fit(traindata, trainlabel.ravel())
                label_svm = clf_svm.predict(x_test)
                allpre[:, i] = label_svm
            elif C[i, 0] == 1:
                # print('test1')
                # knn
                clf_knn = KNN_GPU(k=C[i][1])
                # clf_knn = KNN_torch(k=C[i][1])
                clf_knn.fit(traindata, trainlabel)
                label_knn = clf_knn.predict(x_test)
                allpre[:, i] = label_knn.ravel()
            elif C[i, 0] == 2:
                print('test2')
                # LR
                clf_lr = LogisticRegression()
                clf_lr.fit(traindata, trainlabel)
                label_LR = clf_lr.predict(x_test)
                allpre[:, i] = label_LR
                # # LR
                # clf_lr = LR_GPU()
                # clf_lr.fit(traindata, trainlabel)
                # label_LR = clf_lr.predicted(x_test)
                # allpre[:, i] = label_LR
            elif C[i, 0] == 3:
                print('test3')
                # CART
                clf_cart = DecisionTreeClassifier()
                clf_cart.fit(traindata, trainlabel)
                label_cart = clf_cart.predict(x_test)
                allpre[:, i] = label_cart
            elif C[i, 0] == 4:
                print('test4')
                # RandomForest
                clf_ada = TGBMClassifier()
                clf_ada.fit(traindata, trainlabel.ravel())
                label_ada = clf_ada.predict(x_test)
                allpre[:, i] = label_ada

            else:
                print('error !!!! DECOC.funcPreEDOVO')

        return allpre
Пример #5
0
        def objective(hyperparams):
            hyperparams = self.hyperparams.copy()
            hyperparams['iterations'] = 300
            model = TGBMClassifier(**{**self.params, **hyperparams})
            model.fit(X_train, y_train)
            pred = model.predict(X_eval)

            if self.is_multi_label:
                score = roc_auc_score(y_eval, pred[:, 1])
            else:
                score = roc_auc_score(y_eval, pred)

            return {'loss': -score, 'status': STATUS_OK}
Пример #6
0
    def epoch_train(self, dataloader, run_num, is_multi_label=None, info=None):
        self.is_multi_label = is_multi_label
        X, y, train_idxs, cat = dataloader['X'], dataloader['y'], dataloader[
            'train_idxs'], dataloader['cat_cols']
        train_x, train_y = X.loc[train_idxs], y[train_idxs]

        if info['mode'] == 'bagging':
            self.hyperparams = info['tgb'].copy()
            self.hyperparams['random_seed'] = np.random.randint(0, 2020)
            run_num = self.explore_params_round

        if run_num == self.explore_params_round:
            print('tgb explore_params_round')
            X, y, val_idxs = dataloader['X'], dataloader['y'], dataloader[
                'val_idxs']
            val_x, val_y = X.loc[val_idxs], y[val_idxs]

            self.bayes_opt(train_x, val_x, train_y, val_y, cat)
            #self.early_stop_opt(train_x, val_x, train_y, val_y, cat)

            info['tgb'] = self.hyperparams.copy()

        if run_num == self.all_data_round:
            print('tgb all data round')
            all_train_idxs = dataloader['all_train_idxs']
            train_x = X.loc[all_train_idxs]
            train_y = y[all_train_idxs]

        if self.is_multi_label:
            for cls in range(self.num_class):
                cls_y = train_y[:, cls]
                self.models[cls] = TGBMClassifier(**{
                    **self.params,
                    **self.hyperparams
                })
                self.models[cls].fit(train_x, cls_y)
        else:
            self._model = TGBMClassifier(**{**self.params, **self.hyperparams})
            self._model.fit(train_x, ohe2cat(train_y))
Пример #7
0
 def create_classifer(self, index=0):
     return TGBMClassifier(n_trees=10)
Пример #8
0
class TGBModel(MetaModel):
    def __init__(self):
        super(TGBModel, self).__init__()
        self.max_run = 2
        self.all_data_round = 1
        self.explore_params_round = 0

        self.not_gain_threhlod = 3

        self.patience = 3

        self.is_init = False

        self.name = 'tgb'
        self.type = 'tree'

        self._model = None

        self.params = {
            'objective': 'multi:softprob',
        }

        self.hyperparams = {
            "learning_rate": 0.02,
            'num_class': None,
            'n_trees': 1000,
            'depth': 6,
            'column_sampling_rate': 0.8
        }

        self.is_multi_label = None

        self.num_class = None

        self.models = {}

    def init_model(self, num_class, **kwargs):
        self.is_init = True
        self.num_class = num_class

    @timeit
    def epoch_train(self, dataloader, run_num, is_multi_label=None, info=None):
        self.is_multi_label = is_multi_label
        X, y, train_idxs, cat = dataloader['X'], dataloader['y'], dataloader[
            'train_idxs'], dataloader['cat_cols']
        train_x, train_y = X.loc[train_idxs], y[train_idxs]

        if info['mode'] == 'bagging':
            self.hyperparams = info['tgb'].copy()
            self.hyperparams['random_seed'] = np.random.randint(0, 2020)
            run_num = self.explore_params_round

        if run_num == self.explore_params_round:
            print('tgb explore_params_round')
            X, y, val_idxs = dataloader['X'], dataloader['y'], dataloader[
                'val_idxs']
            val_x, val_y = X.loc[val_idxs], y[val_idxs]

            self.bayes_opt(train_x, val_x, train_y, val_y, cat)
            #self.early_stop_opt(train_x, val_x, train_y, val_y, cat)

            info['tgb'] = self.hyperparams.copy()

        if run_num == self.all_data_round:
            print('tgb all data round')
            all_train_idxs = dataloader['all_train_idxs']
            train_x = X.loc[all_train_idxs]
            train_y = y[all_train_idxs]

        if self.is_multi_label:
            for cls in range(self.num_class):
                cls_y = train_y[:, cls]
                self.models[cls] = TGBMClassifier(**{
                    **self.params,
                    **self.hyperparams
                })
                self.models[cls].fit(train_x, cls_y)
        else:
            self._model = TGBMClassifier(**{**self.params, **self.hyperparams})
            self._model.fit(train_x, ohe2cat(train_y))

    @timeit
    def epoch_valid(self, dataloader):
        X, y, val_idxs = dataloader['X'], dataloader['y'], dataloader[
            'val_idxs']
        val_x, val_y = X.loc[val_idxs], y[val_idxs]
        if not self.is_multi_label:
            preds = self._model.predict_proba(val_x)
        else:
            all_preds = []
            for cls in range(y.shape[1]):
                preds = self.models[cls].predict_proba(val_x)
                all_preds.append(preds[:, 1])
            preds = np.stack(all_preds, axis=1)
        valid_auc = roc_auc_score(val_y, preds)
        return valid_auc

    @timeit
    def predict(self, dataloader):
        X, test_idxs = dataloader['X'], dataloader['test_idxs']
        test_x = X.loc[test_idxs]
        if not self.is_multi_label:
            return self._model.predict_proba(test_x)
        else:
            all_preds = []
            for cls in range(self.num_class):
                preds = self.models[cls].predict_proba(test_x)
                all_preds.append(preds[:, 1])
            return np.stack(all_preds, axis=1)

    @timeit
    def bayes_opt(self, X_train, X_eval, y_train, y_eval, categories):
        if self.is_multi_label:
            y_train = y_train[:, 1]
            y_eval = y_eval[:, 1]
        else:
            y_train = ohe2cat(y_train)

        space = {
            "learning_rate":
            hp.loguniform("learning_rate", np.log(0.01), np.log(0.1)),
            "depth":
            hp.choice("depth", [4, 6, 8, 10, 12]),
            "lambda_tgbm":
            hp.uniform('l2_leaf_reg', 0.1, 2),
        }

        def objective(hyperparams):
            hyperparams = self.hyperparams.copy()
            hyperparams['iterations'] = 300
            model = TGBMClassifier(**{**self.params, **hyperparams})
            model.fit(X_train, y_train)
            pred = model.predict(X_eval)

            if self.is_multi_label:
                score = roc_auc_score(y_eval, pred[:, 1])
            else:
                score = roc_auc_score(y_eval, pred)

            return {'loss': -score, 'status': STATUS_OK}

        trials = Trials()
        best = hyperopt.fmin(fn=objective,
                             space=space,
                             trials=trials,
                             algo=tpe.suggest,
                             max_evals=10,
                             verbose=1,
                             rstate=np.random.RandomState(1))

        self.hyperparams.update(space_eval(space, best))
        log("auc = {}, hyperparams: {}".format(
            -trials.best_trial['result']['loss'], self.hyperparams))
Пример #9
0
import sys
sys.path.append("../")

from thundergbm import TGBMClassifier
from sklearn.datasets import load_digits
from sklearn.metrics import accuracy_score

if __name__ == '__main__':
    x, y = load_digits(return_X_y=True)
    clf = TGBMClassifier()
    clf.fit(x, y)
    y_pred = clf.predict(x)
    accuracy = accuracy_score(y, y_pred)
    print(accuracy)