Пример #1
0
def test_parallel_classification():
    # Check parallel classification.
    rng = check_random_state(0)

    # Classification
    X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=rng)

    ensemble = BaggingClassifier(DecisionTreeClassifier(), n_jobs=3, random_state=0).fit(X_train, y_train)

    # predict_proba
    ensemble.set_params(n_jobs=1)
    y1 = ensemble.predict_proba(X_test)
    ensemble.set_params(n_jobs=2)
    y2 = ensemble.predict_proba(X_test)
    assert_array_almost_equal(y1, y2)

    ensemble = BaggingClassifier(DecisionTreeClassifier(), n_jobs=1, random_state=0).fit(X_train, y_train)

    y3 = ensemble.predict_proba(X_test)
    assert_array_almost_equal(y1, y3)

    # decision_function
    ensemble = BaggingClassifier(SVC(decision_function_shape="ovr"), n_jobs=3, random_state=0).fit(X_train, y_train)

    ensemble.set_params(n_jobs=1)
    decisions1 = ensemble.decision_function(X_test)
    ensemble.set_params(n_jobs=2)
    decisions2 = ensemble.decision_function(X_test)
    assert_array_almost_equal(decisions1, decisions2)

    ensemble = BaggingClassifier(SVC(decision_function_shape="ovr"), n_jobs=1, random_state=0).fit(X_train, y_train)

    decisions3 = ensemble.decision_function(X_test)
    assert_array_almost_equal(decisions1, decisions3)
Пример #2
0
def test_parallel_classification():
    # Check parallel classification.
    rng = check_random_state(0)

    # Classification
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=rng)

    ensemble = BaggingClassifier(DecisionTreeClassifier(),
                                 n_jobs=3,
                                 random_state=0).fit(X_train, y_train)

    # predict_proba
    ensemble.set_params(n_jobs=1)
    y1 = ensemble.predict_proba(X_test)
    ensemble.set_params(n_jobs=2)
    y2 = ensemble.predict_proba(X_test)
    assert_array_almost_equal(y1, y2)

    ensemble = BaggingClassifier(DecisionTreeClassifier(),
                                 n_jobs=1,
                                 random_state=0).fit(X_train, y_train)

    y3 = ensemble.predict_proba(X_test)
    assert_array_almost_equal(y1, y3)

    # decision_function
    ensemble = BaggingClassifier(SVC(gamma='scale',
                                     decision_function_shape='ovr'),
                                 n_jobs=3,
                                 random_state=0).fit(X_train, y_train)

    ensemble.set_params(n_jobs=1)
    decisions1 = ensemble.decision_function(X_test)
    ensemble.set_params(n_jobs=2)
    decisions2 = ensemble.decision_function(X_test)
    assert_array_almost_equal(decisions1, decisions2)

    X_err = np.hstack((X_test, np.zeros((X_test.shape[0], 1))))
    assert_raise_message(
        ValueError, "Number of features of the model "
        "must match the input. Model n_features is {0} "
        "and input n_features is {1} "
        "".format(X_test.shape[1], X_err.shape[1]), ensemble.decision_function,
        X_err)

    ensemble = BaggingClassifier(SVC(gamma='scale',
                                     decision_function_shape='ovr'),
                                 n_jobs=1,
                                 random_state=0).fit(X_train, y_train)

    decisions3 = ensemble.decision_function(X_test)
    assert_array_almost_equal(decisions1, decisions3)
Пример #3
0
def test_parallel_classification():
    # Check parallel classification.
    rng = check_random_state(0)

    # Classification
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=rng)

    ensemble = BaggingClassifier(DecisionTreeClassifier(),
                                 n_jobs=3,
                                 random_state=0).fit(X_train, y_train)

    # predict_proba
    ensemble.set_params(n_jobs=1)
    y1 = ensemble.predict_proba(X_test)
    ensemble.set_params(n_jobs=2)
    y2 = ensemble.predict_proba(X_test)
    assert_array_almost_equal(y1, y2)

    ensemble = BaggingClassifier(DecisionTreeClassifier(),
                                 n_jobs=1,
                                 random_state=0).fit(X_train, y_train)

    y3 = ensemble.predict_proba(X_test)
    assert_array_almost_equal(y1, y3)

    # decision_function
    ensemble = BaggingClassifier(SVC(gamma='scale',
                                     decision_function_shape='ovr'),
                                 n_jobs=3,
                                 random_state=0).fit(X_train, y_train)

    ensemble.set_params(n_jobs=1)
    decisions1 = ensemble.decision_function(X_test)
    ensemble.set_params(n_jobs=2)
    decisions2 = ensemble.decision_function(X_test)
    assert_array_almost_equal(decisions1, decisions2)

    X_err = np.hstack((X_test, np.zeros((X_test.shape[0], 1))))
    assert_raise_message(ValueError, "Number of features of the model "
                         "must match the input. Model n_features is {0} "
                         "and input n_features is {1} "
                         "".format(X_test.shape[1], X_err.shape[1]),
                         ensemble.decision_function, X_err)

    ensemble = BaggingClassifier(SVC(gamma='scale',
                                     decision_function_shape='ovr'),
                                 n_jobs=1,
                                 random_state=0).fit(X_train, y_train)

    decisions3 = ensemble.decision_function(X_test)
    assert_array_almost_equal(decisions1, decisions3)
Пример #4
0
def test_parallel_classification():
    # Check parallel classification.
    rng = check_random_state(0)

    # Classification
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=rng)

    ensemble = BaggingClassifier(DecisionTreeClassifier(),
                                 n_jobs=3,
                                 random_state=0).fit(X_train, y_train)

    # predict_proba
    ensemble.set_params(n_jobs=1)
    y1 = ensemble.predict_proba(X_test)
    ensemble.set_params(n_jobs=2)
    y2 = ensemble.predict_proba(X_test)
    assert_array_almost_equal(y1, y2)

    ensemble = BaggingClassifier(DecisionTreeClassifier(),
                                 n_jobs=1,
                                 random_state=0).fit(X_train, y_train)

    y3 = ensemble.predict_proba(X_test)
    assert_array_almost_equal(y1, y3)

    # decision_function
    ensemble = BaggingClassifier(SVC(decision_function_shape='ovr'),
                                 n_jobs=3,
                                 random_state=0).fit(X_train, y_train)

    ensemble.set_params(n_jobs=1)
    decisions1 = ensemble.decision_function(X_test)
    ensemble.set_params(n_jobs=2)
    decisions2 = ensemble.decision_function(X_test)
    assert_array_almost_equal(decisions1, decisions2)

    X_err = np.hstack((X_test, np.zeros((X_test.shape[0], 1))))
    err_msg = (f"Number of features of the model must match the input. Model "
               f"n_features is {X_test.shape[1]} and input n_features is "
               f"{X_err.shape[1]} ")
    with pytest.raises(ValueError, match=err_msg):
        ensemble.decision_function(X_err)

    ensemble = BaggingClassifier(SVC(decision_function_shape='ovr'),
                                 n_jobs=1,
                                 random_state=0).fit(X_train, y_train)

    decisions3 = ensemble.decision_function(X_test)
    assert_array_almost_equal(decisions1, decisions3)
Пример #5
0
class GrantModel():
    def train(self, features, labels):
        cores = 8
        self.vectorizer = DictVectorizer(sparse=True)
        self.sentiment = BaggingClassifier(svm.LinearSVC(),
                                           max_samples=1.0 / cores,
                                           n_estimators=cores,
                                           n_jobs=cores)

        train_vec = self.vectorizer.fit_transform(features)
        self.sentiment.fit(train_vec, labels)

    def extract_features(self, tweet):
        feats = {}
        tweet = tweet.split(' ')
        feats['NUMCAPS'] = 0
        for j in range(len(tweet)):
            word = tweet[j]
            if len(word) > 0 and word[0] != '@':
                feats['WORD=' + word.lower()] = 1
                feats['NUMCAPS'] += sum(1 for char in word if char.isupper())
        return feats

    def predict(self, newTweetTexts):
        feats = []
        for text in newTweetTexts:
            feats.append(self.extract_features(text))

        feat_vec = self.vectorizer.transform(feats)

        return self.sentiment.decision_function(feat_vec)
Пример #6
0
def othertest(precisionk, draw='False'):
    cleandata = pd.read_csv("./data/cleaned_knnimpute.csv")
    cleandata.index = cleandata.sid
    cleandata = cleandata.drop('sid', 1)
    mask = np.isnan(cleandata['Y'])
    cleandata = cleandata[mask == False]
    #After c is chosen, use this to draw AUC plot
    train_id, test_id = train_test_split(cleandata.index,
                                         test_size=0.2)  # test_ratio = 0.2
    train = cleandata.ix[train_id]
    test = cleandata.ix[test_id]
    coltest = precisionCol(train, precisionk)
    coltest = list(coltest)
    coltest.append('Y')
    train = train[coltest]
    test = test[coltest]
    model = BaggingClassifier(base_estimator=linear_model.LogisticRegression(),
                              n_estimators=100,
                              max_features=200,
                              n_jobs=-1)
    model.fit(train.drop('Y', 1), train['Y'])
    fpr, tpr, thresholds = roc_curve(
        test['Y'],
        model.predict_proba(test.drop('Y', 1))[:, 1])
    print auc(fpr, tpr)
    if draw == 'True':
        plotAUC(test['Y'], model.decision_function(test.drop('Y', 1)),
                'Gradient Boosting')
        plt.savefig("testnorm_randomforest.png", dpi=120)
Пример #7
0
class BaggingClassifierImpl():
    def __init__(self, base_estimator=None, n_estimators=10, max_samples=1.0, max_features=1.0, bootstrap=True, bootstrap_features=False, oob_score=False, warm_start=False, n_jobs=None, random_state=None, verbose=0):
        self._hyperparams = {
            'base_estimator': make_sklearn_compat(base_estimator),
            'n_estimators': n_estimators,
            'max_samples': max_samples,
            'max_features': max_features,
            'bootstrap': bootstrap,
            'bootstrap_features': bootstrap_features,
            'oob_score': oob_score,
            'warm_start': warm_start,
            'n_jobs': n_jobs,
            'random_state': random_state,
            'verbose': verbose}
        self._wrapped_model = SKLModel(**self._hyperparams)

    def fit(self, X, y=None):
        if (y is not None):
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)

    def predict_proba(self, X):
        return self._wrapped_model.predict_proba(X)

    def decision_function(self, X):
        return self._wrapped_model.decision_function(X)
Пример #8
0
def test_parallel_classification():
    # Check parallel classification.
    rng = check_random_state(0)

    # Classification
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=rng)

    ensemble = BaggingClassifier(DecisionTreeClassifier(),
                                 n_jobs=3,
                                 random_state=0).fit(X_train, y_train)

    # predict_proba
    ensemble.set_params(n_jobs=1)
    y1 = ensemble.predict_proba(X_test)
    ensemble.set_params(n_jobs=2)
    y2 = ensemble.predict_proba(X_test)
    assert_array_almost_equal(y1, y2)

    ensemble = BaggingClassifier(DecisionTreeClassifier(),
                                 n_jobs=1,
                                 random_state=0).fit(X_train, y_train)

    y3 = ensemble.predict_proba(X_test)
    assert_array_almost_equal(y1, y3)

    # decision_function
    ensemble = BaggingClassifier(SVC(decision_function_shape='ovr'),
                                 n_jobs=3,
                                 random_state=0).fit(X_train, y_train)

    ensemble.set_params(n_jobs=1)
    decisions1 = ensemble.decision_function(X_test)
    ensemble.set_params(n_jobs=2)
    decisions2 = ensemble.decision_function(X_test)
    assert_array_almost_equal(decisions1, decisions2)

    ensemble = BaggingClassifier(SVC(decision_function_shape='ovr'),
                                 n_jobs=1,
                                 random_state=0).fit(X_train, y_train)

    decisions3 = ensemble.decision_function(X_test)
    assert_array_almost_equal(decisions1, decisions3)
Пример #9
0
class ensemble:
    """

    """
    def __init__(self, X=0, labels=0, name='linear', rand=42):
        self.X = X
        self.labels = labels
        self.name = name
        self.model = []
        self.rand = rand

    def bagging(self, oob_val=False):
        from sklearn.ensemble import BaggingClassifier
        from sklearn.tree import DecisionTreeClassifier
        self.model = BaggingClassifier(DecisionTreeClassifier(),
                                       n_estimators=500,
                                       max_samples=100,
                                       bootstrap=True,
                                       n_jobs=-1,
                                       oob_score_=oob_val)

        return (self.model)

    def bag_rand_forest(self):
        bag_clf = BaggingClassifier(DecisionTreeClassifier(max_features="auto",
                                                           max_leaf_nodes=16),
                                    n_estimators=500,
                                    max_samples=1.0,
                                    bootstrap=True,
                                    n_jobs=-1)

    def oob_score(self):
        val = self.model.oob_score_
        return (val)

    def predictor(self, predict_val):
        return_val = self.model.predict(predict_val)
        return (return_val)

    def predict_percent(self, predict_val):
        pred_percent = self.model.predict_proba(predict_val)

    def predict_scores(self, predict_val):
        scores = self.model.decision_function(predict_val)
        return (scores)

    def accuracy(self, test, x_test, y_test):
        from sklearn.metrics import accuracy_score
        self.model.fit(self.X, self.labels)
        y_pred = self.model.predict(x_test)
        val = accuracy_score(y_test, y_pred)
        return accuracy_score(y_test, y_pred)
Пример #10
0
class SVM(Model):

    def __init__(self, *args, **kwargs):
        self.clf = BaggingClassifier(LinearSVC(penalty='l1', dual=False, tol=1e-7), n_jobs=-1)

    def train(self, x, y):
        self.clf.fit(x, y)

    def predict(self, x):
        pred = self.clf.decision_function(x)
        action = ACTIONS[self.clf.classes_[np.argmax(pred)]]
        alter_action = ACTIONS[self.clf.classes_[np.argsort(pred).squeeze()[-2]]]
        return action, alter_action
Пример #11
0
class BaggedDecisionTreeClassifier():
    def __init__(self,
                 n_estimators=20,
                 bootstrap=True,
                 bootstrap_features=False,
                 oob_score=False,
                 max_depth=None,
                 min_samples_leaf=20,
                 warm_start=False,
                 n_jobs=None,
                 early_stopping='auto',
                 verbose=0,
                 random_state=None):
        self.tree = DecisionTreeClassifier(max_depth=max_depth,
                                           min_samples_leaf=min_samples_leaf)
        self.BagDT = BaggingClassifier(base_estimator=self.tree,
                                       n_estimators=n_estimators,
                                       bootstrap=bootstrap,
                                       bootstrap_features=bootstrap_features,
                                       oob_score=oob_score,
                                       warm_start=warm_start,
                                       n_jobs=n_jobs,
                                       random_state=random_state,
                                       verbose=verbose)

    def decision_function(self, X):
        return self.BagDT.decision_function(X)

    def fit(self, X, y, sample_weight=None):
        self.BagDT.fit(X, y, sample_weight=sample_weight)
        return self.BagDT

    def get_params(self, deep=True):
        return self.BagDT.get_params(deep=deep)

    def predict(self, X):
        return self.BagDT.predict(X)

    def predict_log_proba(self, X):
        return self.BagDT.predict_log_proba(X)

    def predict_proba(self, X):
        return self.BagDT.predict_proba(X)

    def score(self, X, y, sample_weight=None):
        return self.BagDT.score(X, y, sample_weight=sample_weight)

    def set_params(self, **params):
        return self.BagDT.set_params(**params)
Пример #12
0
def bagging(X_train, y_train, cart, X_test, y_test):
    seed = 7
    kfold = model_selection.KFold(n_splits=10)
    num_trees = 100
    model = BaggingClassifier(base_estimator=cart,
                              n_estimators=num_trees,
                              random_state=seed).fit(X_train, y_train)
    results = model.score(X_test, y_test)
    y_df = model.decision_function(X_test)
    y_pred = model.predict(X_test)
    precicions, recall, t = precision_recall_curve(y_test, y_df, pos_label=1)
    print(precicions[:10], recall[:10], t[:10])
    precision = precicions[0]
    confmat = confusion_matrix(y_test, y_pred)
    return results, precision, confmat
Пример #13
0
class BaggingClassifierImpl:
    def __init__(
        self,
        base_estimator=None,
        n_estimators=10,
        max_samples=1.0,
        max_features=1.0,
        bootstrap=True,
        bootstrap_features=False,
        oob_score=False,
        warm_start=False,
        n_jobs=None,
        random_state=None,
        verbose=0,
    ):
        self._hyperparams = {
            "base_estimator": make_sklearn_compat(base_estimator),
            "n_estimators": n_estimators,
            "max_samples": max_samples,
            "max_features": max_features,
            "bootstrap": bootstrap,
            "bootstrap_features": bootstrap_features,
            "oob_score": oob_score,
            "warm_start": warm_start,
            "n_jobs": n_jobs,
            "random_state": random_state,
            "verbose": verbose,
        }
        self._wrapped_model = SKLModel(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)

    def predict_proba(self, X):
        return self._wrapped_model.predict_proba(X)

    def decision_function(self, X):
        return self._wrapped_model.decision_function(X)
Пример #14
0
class BaggingProcessor(Processor):
    def __init__(self,
                 name='bagging',
                 c=1.0,
                 keys_correspondences=DEFAULT_KEYS_CORRESPONDENCES):
        super(BaggingProcessor, self).__init__(name)
        self._model = BaggingClassifier(LinearSVC(C=c),
                                        max_samples=0.5,
                                        max_features=0.8)
        self.keys_correspondences = keys_correspondences

    def to_dict(self):
        output_dict = {
            'data': np.array(pickle.dumps(self._model)),
        }
        return output_dict

    def from_dict(self, dict):
        self._model = pickle.loads(dict['data'])

    def fit(self, x):
        labels_key = self.keys_correspondences["labels_key"]
        features_key = self.keys_correspondences["features_key"]

        labels = copy.deepcopy(x[labels_key])
        labels[labels > 0] = 1
        self._model.fit(x[features_key], labels)

    def run(self, x):
        features_key = self.keys_correspondences["features_key"]
        scores_key = self.keys_correspondences["scores_key"]
        output_type_key = self.keys_correspondences["output_type_key"]

        x[scores_key] = self._model.decision_function(x[features_key])
        x[output_type_key] = ProcessorOutputType.LIKELIHOOD
        return x

    def __str__(self):
        description = {'type': 'Bagging Processor', 'name': self.name}
        return str(description)
Пример #15
0
                    max_iter=200),
                                        n_estimators=10,
                                        max_samples=0.5,
                                        max_features=0.5)
            elif j == 4:
                clf = BaggingClassifier(base_estimator=LinearSVC(
                    penalty='l2', random_state=0, tol=1e-4),
                                        n_estimators=10,
                                        max_samples=0.5,
                                        max_features=0.5)
            skf = StratifiedKFold(n_splits=10)
            skf_accuracy = []
            for train, test in skf.split(X, y):
                clf.fit(X[train], y[train])
                if n_classes.size < 3:
                    skf_accuracy.append(
                        roc_auc_score(y[test],
                                      clf.predict_proba(X[test])[:,
                                                                 1] if j != 4
                                      else clf.decision_function(X[test]),
                                      average='micro'))
                else:
                    ytest_one_hot = label_binarize(y[test], n_classes)
                    skf_accuracy.append(
                        roc_auc_score(ytest_one_hot,
                                      clf.predict_proba(X[test]) if j != 4 else
                                      clf.decision_function(X[test]),
                                      average='micro'))
            accuracy = np.mean(skf_accuracy)
            print(cl[j], 'ACU:/%.3f' % accuracy)
# model = joblib.load("%s/svm_model" % training_set_path)

for test_set_path in [
        "./our_dataset/testing_set/LH_Protein/structures/",
        "./our_dataset/testing_set/LH_NonProtein/structures/",
        "./our_dataset/validation_set/structures/",
        "./our_dataset/homology/LH_Protein/structures/",
        "./our_dataset/homology/LH_NonProtein/structures/"
]:
    print("Importing descriptors from the testing set %s." % test_set_path)
    X_test, y_test, labels_test = loadSamples(
        test_set_path, "*_ab_test_descriptors_N5.txt",
        len("_ab_test_descriptors_N5.txt"))
    print("Number of features: %d." % X_test.shape[-1])
    X_test_scale = scaler.transform(X_test.todense())

    print "Predicting the testing set %s." % test_set_path
    y_score = model.decision_function(X_test_scale)

    get_indexes = lambda x, xs: [
        i for (y, i) in zip(xs, range(len(xs))) if x == y
    ]

    pdb_ids = sorted(set(labels_test))
    for file_id in pdb_ids:
        pdb_id_indices = get_indexes(file_id, labels_test)
        with open("%s/%s_ab_patch_score.txt" % (test_set_path, file_id),
                  "w") as out_scores:
            for p in y_score[pdb_id_indices]:
                out_scores.write("%f\n" % p)
Пример #17
0
def test_parallel():
    """Check parallel computations."""
    rng = check_random_state(0)

    # Classification
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=rng)

    for n_jobs in [-1, 3]:
        ensemble = BaggingClassifier(DecisionTreeClassifier(),
                                     n_jobs=n_jobs,
                                     random_state=0).fit(X_train, y_train)

        # predict_proba
        ensemble.set_params(n_jobs=1)
        y1 = ensemble.predict_proba(X_test)
        ensemble.set_params(n_jobs=2)
        y2 = ensemble.predict_proba(X_test)
        assert_array_almost_equal(y1, y2)

        ensemble = BaggingClassifier(DecisionTreeClassifier(),
                                     n_jobs=1,
                                     random_state=0).fit(X_train, y_train)

        y3 = ensemble.predict_proba(X_test)
        assert_array_almost_equal(y1, y3)

        # decision_function
        ensemble = BaggingClassifier(SVC(), n_jobs=n_jobs,
                                     random_state=0).fit(X_train, y_train)

        ensemble.set_params(n_jobs=1)
        decisions1 = ensemble.decision_function(X_test)
        ensemble.set_params(n_jobs=2)
        decisions2 = ensemble.decision_function(X_test)
        assert_array_almost_equal(decisions1, decisions2)

        ensemble = BaggingClassifier(SVC(), n_jobs=1,
                                     random_state=0).fit(X_train, y_train)

        decisions3 = ensemble.decision_function(X_test)
        assert_array_almost_equal(decisions1, decisions3)

    # Regression
    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        random_state=rng)

    for n_jobs in [-1, 3]:
        ensemble = BaggingRegressor(DecisionTreeRegressor(),
                                    n_jobs=3,
                                    random_state=0).fit(X_train, y_train)

        ensemble.set_params(n_jobs=1)
        y1 = ensemble.predict(X_test)
        ensemble.set_params(n_jobs=2)
        y2 = ensemble.predict(X_test)
        assert_array_almost_equal(y1, y2)

        ensemble = BaggingRegressor(DecisionTreeRegressor(),
                                    n_jobs=1,
                                    random_state=0).fit(X_train, y_train)

        y3 = ensemble.predict(X_test)
        assert_array_almost_equal(y1, y3)
Пример #18
0
def test_parallel():
    """Check parallel computations."""
    rng = check_random_state(0)

    # Classification
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=rng)

    for n_jobs in [-1, 3]:
        ensemble = BaggingClassifier(DecisionTreeClassifier(),
                                     n_jobs=n_jobs,
                                     random_state=0).fit(X_train, y_train)

        # predict_proba
        ensemble.set_params(n_jobs=1)
        y1 = ensemble.predict_proba(X_test)
        ensemble.set_params(n_jobs=2)
        y2 = ensemble.predict_proba(X_test)
        assert_array_almost_equal(y1, y2)

        ensemble = BaggingClassifier(DecisionTreeClassifier(),
                                     n_jobs=1,
                                     random_state=0).fit(X_train, y_train)

        y3 = ensemble.predict_proba(X_test)
        assert_array_almost_equal(y1, y3)

        # decision_function
        ensemble = BaggingClassifier(SVC(),
                                     n_jobs=n_jobs,
                                     random_state=0).fit(X_train, y_train)

        ensemble.set_params(n_jobs=1)
        decisions1 = ensemble.decision_function(X_test)
        ensemble.set_params(n_jobs=2)
        decisions2 = ensemble.decision_function(X_test)
        assert_array_almost_equal(decisions1, decisions2)

        ensemble = BaggingClassifier(SVC(),
                                     n_jobs=1,
                                     random_state=0).fit(X_train, y_train)

        decisions3 = ensemble.decision_function(X_test)
        assert_array_almost_equal(decisions1, decisions3)

    # Regression
    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        random_state=rng)

    for n_jobs in [-1, 3]:
        ensemble = BaggingRegressor(DecisionTreeRegressor(),
                                    n_jobs=3,
                                    random_state=0).fit(X_train, y_train)

        ensemble.set_params(n_jobs=1)
        y1 = ensemble.predict(X_test)
        ensemble.set_params(n_jobs=2)
        y2 = ensemble.predict(X_test)
        assert_array_almost_equal(y1, y2)

        ensemble = BaggingRegressor(DecisionTreeRegressor(),
                                    n_jobs=1,
                                    random_state=0).fit(X_train, y_train)

        y3 = ensemble.predict(X_test)
        assert_array_almost_equal(y1, y3)
Пример #19
0
class HistRandomForestClassifier():
    def __init__(self,
                 loss='auto',
                 max_leaf_nodes=31,
                 max_depth=None,
                 min_samples_leaf=20,
                 l2_regularization=0,
                 max_bins=255,
                 n_estimators=20,
                 max_samples=1.0,
                 bootstrap=True,
                 bootstrap_features=False,
                 oob_score=False,
                 categorical_features=None,
                 monotonic_cst=None,
                 warm_start=False,
                 n_jobs=None,
                 early_stopping='auto',
                 scoring='loss',
                 validation_fraction=0.1,
                 n_iter_no_change=10,
                 tol=1e-7,
                 verbose=0,
                 random_state=None):
        self.loss = loss
        self.max_leaf_nodes = max_leaf_nodes
        self.max_depth = max_depth
        self.min_samples_leaf = min_samples_leaf
        self.l2_regularization = l2_regularization
        self.max_bins = max_bins
        self.n_estimators = n_estimators
        self.max_samples = max_samples
        self.bootstrap = bootstrap
        self.bootstrap_features = bootstrap_features
        self.oob_score = oob_score
        self.categorical_features = categorical_features
        self.monotonic_cst = monotonic_cst
        self.warm_start = warm_start
        self.n_jobs = n_jobs
        self.early_stopping = early_stopping
        self.scoring = scoring
        self.validation_fraction = validation_fraction
        self.n_iter_no_change = n_iter_no_change
        self.tol = tol
        self.verbose = verbose
        self.random_state = random_state

        self.tree = HistGradientBoostingClassifier(
            loss=loss,
            learning_rate=1,
            max_iter=1,
            max_leaf_nodes=max_leaf_nodes,
            max_depth=max_depth,
            min_samples_leaf=min_samples_leaf,
            l2_regularization=l2_regularization,
            max_bins=max_bins,
            categorical_features=categorical_features,
            monotonic_cst=monotonic_cst,
            early_stopping=early_stopping,
            scoring=scoring,
            validation_fraction=validation_fraction,
            n_iter_no_change=n_iter_no_change,
            tol=tol,
            verbose=verbose,
            random_state=random_state)
        self.HistRF = BaggingClassifier(base_estimator=self.tree,
                                        n_estimators=n_estimators,
                                        bootstrap=bootstrap,
                                        bootstrap_features=bootstrap_features,
                                        oob_score=oob_score,
                                        warm_start=warm_start,
                                        n_jobs=n_jobs,
                                        random_state=random_state,
                                        verbose=verbose)

    def decision_function(self, X):
        return self.HistRF.decision_function(X)

    def fit(self, X, y, sample_weight=None):
        self.HistRF.fit(X, y, sample_weight=sample_weight)
        return self.HistRF

    def get_params(self, deep=True):
        return self.HistRF.get_params(deep=deep)

    def predict(self, X):
        return self.HistRF.predict(X)

    def predict_log_proba(self, X):
        return self.HistRF.predict_log_proba(X)

    def predict_proba(self, X):
        return self.HistRF.predict_proba(X)

    def score(self, X, y, sample_weight=None):
        return self.HistRF.score(X, y, sample_weight=sample_weight)

    def set_params(self, **params):
        return self.HistRF.set_params(**params)
Пример #20
0
                    clf = BaggingClassifier(base_estimator=KNeighborsClassifier(n_neighbors=3),
                                            n_estimators=10,
                                            max_samples=0.5,
                                            max_features=0.5)
                elif j == 3:
                    clf = BaggingClassifier(base_estimator=MLPClassifier(hidden_layer_sizes=(100),
                                            activation='relu', solver='adam', batch_size=128,
                                            alpha=1e-4, learning_rate_init=1e-3, learning_rate='adaptive',
                                            tol=1e-4, max_iter=200),
                                            n_estimators=10,
                                            max_samples=0.5,
                                            max_features=0.5)
                elif j == 4:
                    clf = BaggingClassifier(base_estimator=LinearSVC(penalty='l2', random_state=0, tol=1e-4),
                                            n_estimators=10,
                                            max_samples=0.5,
                                            max_features=0.5)
                skf = StratifiedKFold(n_splits=10)
                skf_accuracy = []
                for train, test in skf.split(X, y):
                    clf.fit(X[train], y[train])
                    if n_classes.size < 3:
                        skf_accuracy.append(roc_auc_score(y[test], clf.predict_proba(X[test])[:, 1] if j != 4 else clf.decision_function(X[test]), average='micro'))
                    else:
                        ytest_one_hot = label_binarize(y[test], n_classes)
                        skf_accuracy.append(roc_auc_score(ytest_one_hot, clf.predict_proba(X[test]) if j != 4 else clf.decision_function(X[test]), average='micro'))
                accuracy = np.mean(skf_accuracy)
                of.write(f'{accuracy:.6f}|')
                print(f'{time.time() - start_time:.3f}s')
            of.write('\n')
Пример #21
0
class _BaggingClassifierImpl:
    def __init__(
        self,
        base_estimator=None,
        n_estimators=10,
        *,
        max_samples=1.0,
        max_features=1.0,
        bootstrap=True,
        bootstrap_features=False,
        oob_score=False,
        warm_start=False,
        n_jobs=None,
        random_state=None,
        verbose=0,
    ):
        estimator_impl = base_estimator

        self._hyperparams = {
            "base_estimator": estimator_impl,
            "n_estimators": n_estimators,
            "max_samples": max_samples,
            "max_features": max_features,
            "bootstrap": bootstrap,
            "bootstrap_features": bootstrap_features,
            "oob_score": oob_score,
            "warm_start": warm_start,
            "n_jobs": n_jobs,
            "random_state": random_state,
            "verbose": verbose,
        }
        self._wrapped_model = SKLModel(**self._hyperparams)
        self._hyperparams["base_estimator"] = base_estimator

    def get_params(self, deep=True):
        out = self._wrapped_model.get_params(deep=deep)
        # we want to return the lale operator, not the underlying impl
        out["base_estimator"] = self._hyperparams["base_estimator"]
        return out

    def fit(self, X, y, sample_weight=None):
        if isinstance(X, pd.DataFrame):
            feature_transformer = FunctionTransformer(
                func=lambda X_prime: pd.DataFrame(X_prime, columns=X.columns),
                inverse_func=None,
                check_inverse=False,
            )
            self._hyperparams["base_estimator"] = (
                feature_transformer >> self._hyperparams["base_estimator"])
            self._wrapped_model = SKLModel(**self._hyperparams)
        self._wrapped_model.fit(X, y, sample_weight)

        return self

    def predict(self, X, **predict_params):
        return self._wrapped_model.predict(X, **predict_params)

    def predict_proba(self, X):
        return self._wrapped_model.predict_proba(X)

    def predict_log_proba(self, X):
        return self._wrapped_model.predict_log_proba(X)

    def decision_function(self, X):
        return self._wrapped_model.decision_function(X)

    def score(self, X, y, sample_weight=None):
        return self._wrapped_model.score(X, y, sample_weight)