class LightGBM(BaseAlgo): default_params = {'exec_path': 'lightgbm', 'num_threads': 4} def __init__(self, params): self.params = self.default_params.copy() for k in params: self.params[k] = params[k] def fit(self, X_train, y_train, X_eval=None, y_eval=None, seed=42, feature_names=None, eval_func=None, **kwa): params = self.params.copy() params['bagging_seed'] = seed params['feature_fraction_seed'] = seed + 3 self.model = GBMClassifier(**params) if X_eval is None: self.model.fit(X_train, y_train) else: self.model.fit(X_train, y_train, test_data=[(X_eval, y_eval)]) def predict(self, X): return self.model.predict(X) def predict_proba(self, X): return self.model.predict(X)
def test_multiclass(self): clf = GBMClassifier(exec_path=path_to_exec, min_data_in_leaf=1, learning_rate=0.1, num_leaves=5, num_class=n_classes, metric='multi_logloss', application='multiclass', num_iterations=100) clf.fit(Xmulti, Ymulti.argmax(-1)) clf.fit(Xmulti, Ymulti.argmax(-1), test_data=[(Xmulti, Ymulti.argmax(-1))]) score = metrics.accuracy_score(Ymulti.argmax(-1), clf.predict(Xmulti)) assert score > 0.8
num_iterations=1000, min_data_in_leaf=1, num_leaves=10, metric='binary_error', learning_rate=0.1, early_stopping_round=10, verbose=False) best_rounds = [] scores = [] for i, (train_idx, valid_idx) in enumerate(skf.split(X, Y)): x_train = X[train_idx, :] y_train = Y[train_idx] x_valid = X[valid_idx, :] y_valid = Y[valid_idx] clf.fit(x_train, y_train, test_data=[(x_valid, y_valid)]) best_round = clf.best_round best_rounds.append(best_round) y_pred = clf.predict(x_valid) score = metrics.accuracy_score(y_valid, y_pred) scores.append(score) print("Fold: [{}/{}]: Accuracy: {:.3f}, best round: {}".format( i + 1, skf.n_splits, score, best_round)) print("Average: accuracy: {:.3f}, best round: {}".format( np.mean(scores), int(np.mean(best_rounds))))
# Parameters seed = 1337 nfolds = 5 test_size = 0.2 path_to_exec = "~/Documents/apps/LightGBM/lightgbm" np.random.seed(seed) # for reproducibility X, Y = datasets.make_classification(n_samples=1000, n_features=100, random_state=seed) x_train, x_test, y_train, y_test = model_selection.train_test_split( X, Y, test_size=test_size, random_state=seed) # 'exec_path' is the path to lightgbm executable clf = GBMClassifier(exec_path=path_to_exec, num_iterations=1000, learning_rate=0.01, min_data_in_leaf=1, num_leaves=5, metric='binary_error', early_stopping_round=20) clf.fit(x_train, y_train, test_data=[(x_test, y_test)]) y_prob = clf.predict_proba(x_test) y_pred = y_prob.argmax(-1) print("Log loss: ", metrics.log_loss(y_test, y_prob)) print("Accuracy: ", metrics.accuracy_score(y_test, y_pred)) print("Best round: ", clf.best_round)
bst1 = xgb.train(params, dtrain, params['n']) # ------------------------------------------------------------------ params = { 'exec_path': path_to_exec, 'num_iterations': 108, 'learning_rate': 0.079, 'num_leaves': 13, 'metric': 'binary_error', 'min_sum_hessian_in_leaf': 1, 'bagging_fraction': 0.642, 'bagging_freq': 1, 'verbose': 0 } bst2 = GBMClassifier(boosting_type='gbdt', **params) bst2.fit(X_train, y_train) # ------------------------------------------------------------------ params_est = { 'n_estimators': 300, 'loss': 'exponential', 'learning_rate': 0.08, 'subsample': 0.6910000000000001, 'min_samples_leaf': 340, 'max_features': 53, 'random_state': 1 } bst3 = GradientBoostingClassifier(**params_est) bst3.fit(X_train, y_train) # ------------------------------------------------------------------ from keras.callbacks import Callback as keras_clb random.seed(666)
# -*- coding: utf-8 -*- """ @author: Ardalan MEHRANI <*****@*****.**> @brief: """ import pickle import numpy as np from sklearn import datasets, metrics, model_selection from pylightgbm.models import GBMClassifier # Parameters path_to_exec = "~/Documents/apps/LightGBM/lightgbm" X, Y = datasets.make_classification(n_samples=1000, n_features=100, random_state=1337) # 'exec_path' is the path to lightgbm executable clf = GBMClassifier(exec_path=path_to_exec, verbose=False) clf.fit(X, Y) y_pred = clf.predict(X) print("Accuracy: ", metrics.accuracy_score(Y, y_pred)) # The sklearn API models are picklable print("Pickling sklearn API models") pickle.dump(clf, open("clf_gbm.pkl", "wb")) clf2 = pickle.load(open("clf_gbm.pkl", "rb")) print(np.allclose(clf.predict(X), clf2.predict(X)))
num_leaves=64, min_data_in_leaf=1, min_sum_hessian_in_leaf=1e-4, num_iterations=5000, num_threads=4, early_stopping_round=EARLY_STOPPING, drop_rate=0.0001, max_depth=6, lambda_l1=0., lambda_l2=0., max_bin=63, feature_fraction=1.0, #bagging_fraction=0.5, #bagging_freq=3, verbose=True) cl.fit(X_train, y_train, test_data=[(X_test, y_test)]) #</editor-fold> #<editor-fold desc="Генерация сабмита"> if MAKE_SUBMISSION: print('Computing submission probabilities...') y_submission = cl.predict_proba(x_test)[:, 1] print('Store submission data') submission_filename = os.path.join(submission_folder, 'submission_lightgbm.dat') store_submission(y_submission, submission_filename) print( 'Submission data have been stored in {}\n'.format(submission_filename))