示例#1
0
def runKNN(data, target):
    folds = [10]
    depths = [10]
    print("------------ NB ------------")
    mms = MinMaxScaler()
    stdsc = StandardScaler()
    datamms = mms.fit_transform(data)
    datastdsc = stdsc.fit_transform(data)
    for fold in folds:
        print('fold = %d ' % fold)
        for depth in depths:
            knn = KNeighborsClassifier()
            testpredict, testtarget = cross_val_pred2ict(knn,
                                                         data,
                                                         target,
                                                         cv=10,
                                                         n_jobs=-1)
            print_scores(testpredict, testtarget)

            testpredict, testtarget = cross_val_pred2ict(knn,
                                                         datamms,
                                                         target,
                                                         cv=10,
                                                         n_jobs=-1)
            print_scores(testpredict, testtarget)
            testpredict, testtarget = cross_val_pred2ict(knn,
                                                         datastdsc,
                                                         target,
                                                         cv=10,
                                                         n_jobs=-1)
            print_scores(testpredict, testtarget)
示例#2
0
def runNB(data, target):
    folds = [10]
    depths = [10]
    print("------------ NB ------------")
    mms = MinMaxScaler()
    stdsc = StandardScaler()
    datamms = mms.fit_transform(data)
    datastdsc = stdsc.fit_transform(data)
    for fold in folds:
        print('fold = %d ' % fold)
        for depth in depths:
            nvb = GaussianNB()
            testpredict, testtarget = cross_val_pred2ict(nvb,
                                                         data,
                                                         target,
                                                         cv=10,
                                                         n_jobs=-1)
            print_scores(testpredict, testtarget)
            nvb = GaussianNB()
            testpredict, testtarget = cross_val_pred2ict(nvb,
                                                         datamms,
                                                         target,
                                                         cv=10,
                                                         n_jobs=-1)
            print_scores(testpredict, testtarget)
            nvb = GaussianNB()
            testpredict, testtarget = cross_val_pred2ict(nvb,
                                                         datastdsc,
                                                         target,
                                                         cv=10,
                                                         n_jobs=-1)
            print_scores(testpredict, testtarget)
示例#3
0
def runvoting(data, target):
    folds = [10]

    for fold in folds:
        print('fold = %d ' % fold)

        clf1 = KNeighborsClassifier(n_neighbors=5)
        clf2 = tree.DecisionTreeClassifier(random_state=1)
        clf3 = GaussianNB()
        skf = StratifiedKFold(n_splits=fold, random_state=2)
        eclf1 = VotingClassifier(estimators=[('Ada', clf1),
                                             ('RandomForest', clf2),
                                             ('SVM', clf3)],
                                 voting='hard')
        eclf2 = VotingClassifier(estimators=[('Ada', clf1),
                                             ('RandomForest', clf2),
                                             ('SVM', clf3)],
                                 voting='soft')
        for clf, label in zip(
            [clf1, clf2, clf3, eclf1],
            ['Ada', 'RandomForest', 'SVM RBF', 'ESEMBLE HARD', 'ESEMBLE SOFT'
             ]):
            testpredict, testtarget = cross_val_pred2ict(clf,
                                                         data,
                                                         target,
                                                         cv=skf.get_n_splits(
                                                             data, target),
                                                         n_jobs=-1)
            print("--------------------------")
            print(label)
            print_scores(testpredict, testtarget)
        eclf1.fit(data, target)
示例#4
0
def runadatree(data, target):
    folds = [10]
    depths = [10, 100, 1000]
    estimators = [100, 1000]
    for fold in folds:

        print('fold = %d ' % fold)

        for depth in depths:
            for estimator in estimators:
                matrices1 = []
                matrices2 = []
                print('depth = %d ' % depth)
                print('estimators = %d ' % estimator)
                clf = tree.DecisionTreeClassifier(max_depth=depth)
                skf = StratifiedKFold(n_splits=fold, random_state=5)
                adaboosting = AdaBoostClassifier(
                    tree.DecisionTreeClassifier(max_depth=depth),
                    n_estimators=estimator)
                testpredict, testtarget = cross_val_pred2ict(adaboosting,
                                                             data,
                                                             target,
                                                             cv=skf,
                                                             n_jobs=-1)

                if len(testpredict) != len(testtarget):
                    raise ValueError('length score and target are different!')
                for pr, tar in zip(testpredict, testtarget):
                    matrices1.append(confusion_matrix(tar, pr))
                precision1st = precision(matrices1)
                sesnivitivity1st = sensitivity(matrices1)
                print("Accuracy: %r" % str(accuracy(matrices1)))
                print("Precision: %r" % str(precision1st))
                print("Recall: %r" % str(sesnivitivity1st))
                print("f1")
                print(f1tpfp(matrices1))
                print(f1prre(precision1st, sesnivitivity1st))
                print(f1avg(matrices1))

                for matrix in matrices1:
                    matrices2.append(
                        np.array([[matrix[1, 1], matrix[1, 0]],
                                  [matrix[0, 1], matrix[0, 0]]]))

                precision2st = precision(matrices2)
                sesnivitivity2st = sensitivity(matrices2)
                print("Accuracy: %r" % str(accuracy(matrices2)))
                print("Precision: %r" % str(precision2st))
                print("Recall: %r" % str(sesnivitivity2st))
                print("f1")
                print(f1tpfp(matrices2))
                print(f1prre(precision2st, sesnivitivity2st))
                print(f1avg(matrices2))
示例#5
0
def runstacking(data, target):
    folds = [10]

    for fold in folds:
        print('fold = %d ' % fold)

        clf1 = KNeighborsClassifier(n_neighbors=5)
        clf2 = tree.DecisionTreeClassifier(random_state=1)
        clf3 = GaussianNB()
        lr = LogisticRegression(C=10.0)
        sclf = StackingClassifier(classifiers=[clf1, clf2, clf3],
                                  meta_classifier=lr)

        params = {
            'kneighborsclassifier__n_neighbors': [1, 5],
            'decisiontreeclassifier__max_depth': [1, 10, 50],
            'meta-logisticregression__C': [0.1, 10.0]
        }

        grid = GridSearchCV(estimator=sclf,
                            param_grid=params,
                            cv=10,
                            refit=True)
        grid.fit(data, target)

        cv_keys = ('mean_test_score', 'std_test_score', 'params')

        for r, _ in enumerate(grid.cv_results_['mean_test_score']):
            print("%0.3f +/- %0.2f %r" %
                  (grid.cv_results_[cv_keys[0]][r],
                   grid.cv_results_[cv_keys[1]][r] / 2.0,
                   grid.cv_results_[cv_keys[2]][r]))

        print('Best parameters: %s' % grid.best_params_)
        print('Accuracy: %.2f' % grid.best_score_)

        skf = StratifiedKFold(n_splits=fold, random_state=2)
        eclf1 = StackingClassifier(classifiers=[clf1, clf2, clf3],
                                   meta_classifier=lr)
        for clf, label in zip(
            [clf1, clf2, clf3, eclf1],
            ['Ada', 'RandomForest', 'SVM RBF', 'ESEMBLE HARD', 'ESEMBLE SOFT'
             ]):
            testpredict, testtarget = cross_val_pred2ict(clf,
                                                         data,
                                                         target,
                                                         cv=skf.get_n_splits(
                                                             data, target),
                                                         n_jobs=-1)
            print("--------------------------")
            print(label)
            print_scores(testpredict, testtarget)
示例#6
0
文件: svm.py 项目: kob22/pracamgr
def runsvcn(data, target):
    folds = [10]
    kernels = ['rbf']
    print("------------ SVM  ------------")
    mms = MinMaxScaler()
    stdsc = StandardScaler()
    datamms = mms.fit_transform(data)
    datastdsc = stdsc.fit_transform(data)
    for fold in folds:
        print('fold = %d ' % fold)
        for kernel in kernels:
            print('----- KERNEL = %s -----' % kernel)
            matrices1 = []
            matrices2 = []
            skf = StratifiedKFold(n_splits=fold, random_state=5)

            svc = svm.SVC(C=1, kernel=kernel)
            svc.set_params()
            testpredict, testtarget = cross_val_pred2ict(svc,
                                                         data,
                                                         target,
                                                         cv=10,
                                                         n_jobs=-1)
            print_scores(testpredict, testtarget)

            testpredict, testtarget = cross_val_pred2ict(svc,
                                                         datamms,
                                                         target,
                                                         cv=10,
                                                         n_jobs=-1)
            print_scores(testpredict, testtarget)
            testpredict, testtarget = cross_val_pred2ict(svc,
                                                         datastdsc,
                                                         target,
                                                         cv=10,
                                                         n_jobs=-1)
            print_scores(testpredict, testtarget)
示例#7
0
def runforest(data, target):
    folds = [10]
    estimators = [100]
    print("------------ RANDOM FOREST  ------------")

    for fold in folds:

        print('fold = %d ' % fold)
        for estimator in estimators:
            print('estimators = %d ' % estimator)
            clf = RandomForestClassifier(n_estimators=estimator)

            skf = StratifiedKFold(n_splits=fold, random_state=5)

            testpredict, testtarget = cross_val_pred2ict(clf,
                                                         data,
                                                         target,
                                                         cv=skf.get_n_splits(
                                                             data, target),
                                                         n_jobs=-1)

            print_scores(testpredict, testtarget)
示例#8
0
def runbaggingtree(data, target):
    folds = [3]
    depths = [5]
    estimators = [50]
    for fold in folds:

        print('fold = %d ' % fold)

        for depth in depths:
            for estimator in estimators:

                print('depth = %d ' % depth)
                print('estimators = %d ' % estimator)

                skf = StratifiedKFold(n_splits=fold, random_state=5)
                bagging = BaggingClassifier(KNeighborsClassifier(),
                                            n_estimators=estimator)
                testpredict, testtarget = cross_val_pred2ict(bagging,
                                                             data,
                                                             target,
                                                             cv=10,
                                                             n_jobs=-1)

                print_scores(testpredict, testtarget)
示例#9
0
    print('Klasa: %s' % data)
    importdata.print_info(db.target)
    rows = []
    for i in range(5):
        rows.append([data])

    # obliczenia dla kazdego klasyfikatora
    for clf in clfs:
        scores = []
        # powtarzanie klasyfikacji
        for iteration in range(iterations):
            clf_ = clone(clf)
            # sprawdzian krzyzowy
            testpredict, testtarget = cross_val_pred2ict(clf_,
                                                         db.data,
                                                         db.target,
                                                         cv=folds,
                                                         n_jobs=-1)
            scores.append(accsespf1g(testpredict, testtarget))
            print(str(clf))
            print_scores(testpredict, testtarget)
        # usrednanie wynikow
        avgscores = avgaccsespf1g(scores)
        to_decimal = print_to_latex_two_decimal(avgscores)
        for i, score in enumerate(to_decimal):
            rows[i].append(score)
    for table, row in zip(tables, rows):
        print(row)
        table.add_row(row)
        table.add_hline()
示例#10
0
    def fit(self, X, y):

        if isinstance(y, np.ndarray) and len(y.shape) > 1 and y.shape[1] > 1:
            raise NotImplementedError('Multilabel and multi-output'
                                      ' classification is not supported.')

        if self.estimators is None or len(self.estimators) == 0:
            raise AttributeError('Invalid `estimators` attribute, `estimators`'
                                 ' should be a list of (string, estimator)'
                                 ' tuples')

        # klonowanie
        self.estimators_ = [
            clone(estimator) for _, estimator in self.estimators
        ]

        cv_predictions = []
        targets = []
        self.groups = np.unique(y)

        # ocena klasyfikatorow
        for estimator in self.estimators_:
            testpredict, testtarget = cross_val_pred2ict(estimator,
                                                         X,
                                                         y,
                                                         cv=self.n_folds,
                                                         n_jobs=1)
            cv_predictions.append((testpredict))
            targets.append(testtarget)

        # wylanianie ekspertow w swojej klasie
        for idx, (prediction, target) in enumerate(zip(cv_predictions,
                                                       targets)):

            matrixes1 = []
            matrixes2 = []
            for pred, tar in zip(prediction, target):
                matrixes1.append(simplefunctions.confusion_matrix(tar, pred))

            for matrix in matrixes1:
                matrixes2.append(
                    np.array([[matrix[1, 1], matrix[1, 0]],
                              [matrix[0, 1], matrix[0, 0]]]))

            class1 = getattr(simplefunctions, self.function_compare)(matrixes1)

            if class1 > self.max_rating[0]:
                self.max_rating[0] = class1
                self.experts[0] = (idx)
                self.g_mean[0] = simplefunctions.g_meantpfp(matrixes1)
            elif class1 == self.max_rating[0]:
                self.experts[0] = (idx)
                self.g_mean[0] = simplefunctions.g_meantpfp(matrixes1)

            class2 = getattr(simplefunctions, self.function_compare)(matrixes2)
            if class2 > self.max_rating[1]:
                self.max_rating[1] = class2
                self.experts[1] = idx
                self.g_mean[1] = simplefunctions.g_meantpfp(matrixes1)
            elif class1 == self.max_rating[1]:
                self.experts[1] = idx
                self.g_mean[1] = simplefunctions.g_meantpfp(matrixes1)

        self.estimators_ = Parallel(n_jobs=self.n_jobs)(
            delayed(_parallel_fit_estimator)(clone(clf), X, y)
            for _, clf in self.estimators)

        return self
示例#11
0
def cross_val_oversampling_before(data, target):
    print("Testowanie CV bez oversamplingu")
    # skalowanie dla SVM i kNN
    stdsc = StandardScaler()
    datastdsc = stdsc.fit_transform(data)

    rows_normal = []
    rows_stand = []
    # klasyfikator NB i tree
    for clf in clfs_normal:
        print('Klasyfikator: %s' % clf[0])
        clf_ = clone(clf[1])

        # CV
        testpredict, testtarget = cross_val_pred2ict(clf_,
                                                     data,
                                                     target,
                                                     cv=folds,
                                                     n_jobs=-1)
        print_scores(testpredict, testtarget)
        row = []
        row.extend(print_to_latex_sespf1g(testpredict, testtarget))

        # roc
        testroc = cross_val_predict(clf_,
                                    data,
                                    target,
                                    cv=folds,
                                    n_jobs=-1,
                                    method='predict_proba')
        row.append(
            float("{0:.2f}".format(
                roc_auc_score(y_true=target, y_score=testroc[:, 1]))))

        rows_normal.extend(row)

    # klasyfikator SVM i kNN
    for clf in clfs_stand:
        print('Klasyfikator: %s' % clf[0])
        clf_ = clone(clf[1])

        # CV
        testpredict, testtarget = cross_val_pred2ict(clf_,
                                                     datastdsc,
                                                     target,
                                                     cv=folds,
                                                     n_jobs=-1)
        print_scores(testpredict, testtarget)
        row = []
        row.extend(print_to_latex_sespf1g(testpredict, testtarget))

        # roc
        testroc = cross_val_predict(clf_,
                                    datastdsc,
                                    target,
                                    cv=folds,
                                    n_jobs=-1,
                                    method='predict_proba')
        row.append(
            float("{0:.2f}".format(
                roc_auc_score(y_true=target, y_score=testroc[:, 1]))))
        rows_stand.extend(row)

    return rows_normal, rows_stand
示例#12
0
def runtree(data, target):

    sm = SMOTEENN()
    X_train, X_test, y_train, y_test = train_test_split(data,
                                                        target,
                                                        test_size=0.10,
                                                        random_state=5,
                                                        stratify=target)
    print(y_test.size)
    print(np.bincount(y_test))
    X_resampled, y_resampled = sm.fit_sample(X_train, y_train)
    folds = [10]
    depths = [10]
    print("------------ TREE ------------")

    for fold in folds:
        print('fold = %d ' % fold)
        for depth in depths:
            print('depth = %d ' % depth)
            clf = GaussianNB()
            skf = StratifiedKFold(n_splits=fold, random_state=5)

            testpredict, testtarget = cross_val_pred2ict(clf,
                                                         data,
                                                         target,
                                                         cv=fold,
                                                         n_jobs=-1)

            print_scores(testpredict, testtarget)
            print('smotens - przecenione')
            testpredict, testtarget = cross_val_pred2ict(clf,
                                                         X_resampled,
                                                         y_resampled,
                                                         cv=fold,
                                                         n_jobs=-1)
            print_scores(testpredict, testtarget)

            testpredict = cross_val_predict(clf,
                                            data,
                                            target,
                                            cv=fold,
                                            n_jobs=-1,
                                            method='predict_proba')
            print(roc_auc_score(y_true=target, y_score=testpredict[:, 1]))

            testpredict = cross_val_predict(clf,
                                            X_resampled,
                                            y_resampled,
                                            cv=fold,
                                            n_jobs=-1,
                                            method='predict_proba')
            print(roc_auc_score(y_true=y_resampled, y_score=testpredict[:, 1]))
            print('smotens - na czesci')
            clf.fit(X_resampled, y_resampled)
            print_scores([clf.predict(X_test)], [y_test])
            # print(roc_auc_score(y_true=y_test, y_score=clf.predict_proba(X_test)[:, 1]))

            print('smotens - wlasciwe')
            clf_train = KNeighborsClassifier()
            predict_re = []
            targets_re = []
            proba_re = []
            target_proba_re = []

            for train_index, test_index in skf.split(data, target):
                clf_train_ = clone(clf_train)
                data_re, tar_re = sm.fit_sample(data[train_index],
                                                target[train_index])
                clf_train_.fit(data_re, tar_re)
                predict_re.append(clf_train_.predict(data[test_index]))
                targets_re.append(target[test_index])
                proba_re.extend(
                    clf_train_.predict_proba(data[test_index])[:, 1])
                target_proba_re.extend(target[test_index])

            print_scores(predict_re, targets_re)
            # print(test_re)
            # print(proba_re)
            print(roc_auc_score(y_true=target_proba_re, y_score=proba_re))
示例#13
0
    def fit(self, X, y):
        if isinstance(y, np.ndarray) and len(y.shape) > 1 and y.shape[1] > 1:
            raise NotImplementedError('Multilabel and multi-output'
                                      ' classification is not supported.')

        if self.estimators is None or len(self.estimators) == 0:
            raise AttributeError('Invalid `estimators` attribute, `estimators`'
                                 ' should be a list of (string, estimator)'
                                 ' tuples')
        cv_predictions = []
        targets = []

        # klonowanie
        self.estimators_ = [
            clone(estimator) for _, estimator in self.estimators
        ]

        # dodawanie klasyfikatorow AdaBoost
        for clf in self.estimators_ada:
            self.clfs.append(
                AdaBoostClassifier(clone(clf), n_estimators=self.n_estimators))

        # dodawanie klasyfikatorow Bagging
        for clf in self.estimators_bag:
            self.clfs.append(
                BaggingClassifier(clone(clf),
                                  n_estimators=100,
                                  max_samples=0.9))

        self.clfs.append(
            StackingClassifier(classifiers=self.estimators_,
                               meta_classifier=LogisticRegression()))
        self.clfs.append(clf_expert(self.estimators))
        # ocena klasyfikatorow
        for clf in self.clfs:
            testpredict, testtarget = cross_val_pred2ict(clf,
                                                         X,
                                                         y,
                                                         cv=self.n_folds,
                                                         n_jobs=1)
            cv_predictions.append((testpredict))
            targets.append(testtarget)

        skf = StratifiedKFold(n_splits=2, random_state=self.random_st)

        # trenowanie i ocenianie klasyfiktorow dla zbioru SMOTE i NCR
        for clf in self.clfs:
            for method, name in zip(self.methoda, self.name_met):
                metodaa = SMOTE(k_neighbors=3, random_state=self.random_st)
                metodaj = NeighbourhoodCleaningRule(
                    n_neighbors=3, random_state=self.random_st)

                predict_re = []
                targets_re = []
                for train_index, test_index in skf.split(X, y):

                    if method == 0:
                        data_re, tar_re = metodaa.fit_sample(
                            np.asarray(X[train_index]),
                            np.asarray(y[train_index]))
                    else:
                        data_re, tar_re = metodaj.fit_sample(
                            np.asarray(X[train_index]),
                            np.asarray(y[train_index]))

                    clf_ = clone(clf)

                    # trenowanie
                    clf_.fit(data_re, tar_re)

                    # testowanie
                    predict_re.append(clf_.predict(X[test_index]))
                    targets_re.append(y[test_index])
                cv_predictions.append((predict_re))
                targets.append(targets_re)

        # wylanianie 2 najlepszych ekspertow
        for idx, (prediction, target) in enumerate(zip(cv_predictions,
                                                       targets)):

            matrixes1 = []
            matrixes2 = []
            for pred, tar in zip(prediction, target):
                matrixes1.append(simplefunctions.confusion_matrix(tar, pred))
            for matrix in matrixes1:
                matrixes2.append(
                    np.array([[matrix[1, 1], matrix[1, 0]],
                              [matrix[0, 1], matrix[0, 0]]]))
            fun_cmp = getattr(simplefunctions,
                              self.function_compare)(matrixes1)

            if fun_cmp > self.max_g[0]:
                self.clf_id[1] = self.clf_id[0]
                self.clf_id[0] = idx
                self.max_g[1] = self.max_g[0]
                self.max_g[0] = fun_cmp
            elif fun_cmp > self.max_g[1]:
                self.clf_id[2] = self.clf_id[1]
                self.clf_id[1] = idx
                self.max_g[2] = self.max_g[1]
                self.max_g[0] = fun_cmp
            elif fun_cmp > self.max_g[2]:
                self.clf_id[2] = idx
                self.max_g[2] = fun_cmp
        for clf_id in self.clf_id:
            if clf_id > len(self.estimators_ada) + len(self.estimators_bag):
                if clf_id % 2 == 0:
                    met = self.methods[0]
                    data_re, tar_re = met.fit_sample(X, y)
                    clf_ = clone(self.clfs[(clf_id - 7) / 2])
                    self.ensemble_.append(clf_.fit(data_re, tar_re))
                else:
                    met = self.methods[1]
                    data_re, tar_re = met.fit_sample(X, y)
                    clf_ = clone(self.clfs[(clf_id - 7) / 2])
                    self.ensemble_.append(clf_.fit(data_re, tar_re))
            else:
                clf_ = clone(self.clfs[clf_id])
                self.ensemble_.append(clf_.fit(X, y))

        meta_features = self._predict_meta_features(X)
        self.meta_clf_.fit(meta_features, y)