class LightGBM(BaseAlgo):

    default_params = {'exec_path': 'lightgbm', 'num_threads': 4}

    def __init__(self, params):
        self.params = self.default_params.copy()

        for k in params:
            self.params[k] = params[k]

    def fit(self,
            X_train,
            y_train,
            X_eval=None,
            y_eval=None,
            seed=42,
            feature_names=None,
            eval_func=None,
            **kwa):
        params = self.params.copy()
        params['bagging_seed'] = seed
        params['feature_fraction_seed'] = seed + 3

        self.model = GBMClassifier(**params)

        if X_eval is None:
            self.model.fit(X_train, y_train)
        else:
            self.model.fit(X_train, y_train, test_data=[(X_eval, y_eval)])

    def predict(self, X):
        return self.model.predict(X)

    def predict_proba(self, X):
        return self.model.predict(X)
    def test_multiclass(self):

        clf = GBMClassifier(exec_path=path_to_exec,
                            min_data_in_leaf=1,
                            learning_rate=0.1,
                            num_leaves=5,
                            num_class=n_classes,
                            metric='multi_logloss',
                            application='multiclass',
                            num_iterations=100)
        clf.fit(Xmulti, Ymulti.argmax(-1))
        clf.fit(Xmulti,
                Ymulti.argmax(-1),
                test_data=[(Xmulti, Ymulti.argmax(-1))])
        score = metrics.accuracy_score(Ymulti.argmax(-1), clf.predict(Xmulti))
        assert score > 0.8
示例#3
0
                    num_iterations=1000,
                    min_data_in_leaf=1,
                    num_leaves=10,
                    metric='binary_error',
                    learning_rate=0.1,
                    early_stopping_round=10,
                    verbose=False)

best_rounds = []
scores = []
for i, (train_idx, valid_idx) in enumerate(skf.split(X, Y)):
    x_train = X[train_idx, :]
    y_train = Y[train_idx]

    x_valid = X[valid_idx, :]
    y_valid = Y[valid_idx]

    clf.fit(x_train, y_train, test_data=[(x_valid, y_valid)])
    best_round = clf.best_round
    best_rounds.append(best_round)

    y_pred = clf.predict(x_valid)

    score = metrics.accuracy_score(y_valid, y_pred)
    scores.append(score)

    print("Fold: [{}/{}]: Accuracy: {:.3f}, best round: {}".format(
        i + 1, skf.n_splits, score, best_round))
print("Average: accuracy: {:.3f}, best round: {}".format(
    np.mean(scores), int(np.mean(best_rounds))))
示例#4
0
# -*- coding: utf-8 -*-
"""
@author: Ardalan MEHRANI <*****@*****.**>
@brief:
"""
import pickle
import numpy as np
from sklearn import datasets, metrics, model_selection
from pylightgbm.models import GBMClassifier

# Parameters
path_to_exec = "~/Documents/apps/LightGBM/lightgbm"

X, Y = datasets.make_classification(n_samples=1000,
                                    n_features=100,
                                    random_state=1337)

# 'exec_path' is the path to lightgbm executable
clf = GBMClassifier(exec_path=path_to_exec, verbose=False)

clf.fit(X, Y)

y_pred = clf.predict(X)

print("Accuracy: ", metrics.accuracy_score(Y, y_pred))

# The sklearn API models are picklable
print("Pickling sklearn API models")
pickle.dump(clf, open("clf_gbm.pkl", "wb"))
clf2 = pickle.load(open("clf_gbm.pkl", "rb"))
print(np.allclose(clf.predict(X), clf2.predict(X)))