def test_gradient_boosting(n_samples=1000): """ Testing workability of GradientBoosting with different loss function """ # Generating some samples correlated with first variable distance = 0.6 testX, testY = generate_sample(n_samples, 10, distance) trainX, trainY = generate_sample(n_samples, 10, distance) # We will try to get uniform distribution along this variable uniform_features = ['column0'] loss1 = LogLossFunction() loss2 = AdaLossFunction() loss3 = CompositeLossFunction() loss4 = KnnAdaLossFunction(uniform_features=uniform_features, uniform_label=1) loss5 = KnnAdaLossFunction(uniform_features=uniform_features, uniform_label=[0, 1]) loss6bin = BinFlatnessLossFunction(uniform_features, fl_coefficient=2., uniform_label=0) loss7bin = BinFlatnessLossFunction(uniform_features, fl_coefficient=2., uniform_label=[0, 1]) loss6knn = KnnFlatnessLossFunction(uniform_features, fl_coefficient=2., uniform_label=1) loss7knn = KnnFlatnessLossFunction(uniform_features, fl_coefficient=2., uniform_label=[0, 1]) for loss in [loss1, loss2, loss3, loss4, loss5, loss6bin, loss7bin, loss6knn, loss7knn]: clf = UGradientBoostingClassifier(loss=loss, min_samples_split=20, max_depth=5, learning_rate=0.2, subsample=0.7, n_estimators=25, train_features=None) \ .fit(trainX[:n_samples], trainY[:n_samples]) result = clf.score(testX, testY) assert result >= 0.7, "The quality is too poor: {} with loss: {}".format(result, loss)
def test_gradient_boosting(n_samples=1000): """ Testing workability of GradientBoosting with different loss function """ # Generating some samples correlated with first variable distance = 0.6 testX, testY = generate_sample(n_samples, 10, distance) trainX, trainY = generate_sample(n_samples, 10, distance) # We will try to get uniform distribution along this variable uniform_features = ['column0'] loss1 = LogLossFunction() loss2 = AdaLossFunction() loss3 = losses.CompositeLossFunction() loss4 = losses.KnnAdaLossFunction(uniform_features=uniform_features, uniform_label=1) loss5 = losses.KnnAdaLossFunction(uniform_features=uniform_features, uniform_label=[0, 1]) loss6bin = losses.BinFlatnessLossFunction(uniform_features, fl_coefficient=2., uniform_label=0) loss7bin = losses.BinFlatnessLossFunction(uniform_features, fl_coefficient=2., uniform_label=[0, 1]) loss6knn = losses.KnnFlatnessLossFunction(uniform_features, fl_coefficient=2., uniform_label=1) loss7knn = losses.KnnFlatnessLossFunction(uniform_features, fl_coefficient=2., uniform_label=[0, 1]) for loss in [ loss1, loss2, loss3, loss4, loss5, loss6bin, loss7bin, loss6knn, loss7knn ]: clf = UGradientBoostingClassifier(loss=loss, min_samples_split=20, max_depth=5, learning_rate=0.2, subsample=0.7, n_estimators=25, train_features=None) \ .fit(trainX[:n_samples], trainY[:n_samples]) result = clf.score(testX, testY) assert result >= 0.7, "The quality is too poor: {} with loss: {}".format( result, loss) trainX['fake_request'] = numpy.random.randint(0, 4, size=len(trainX)) for loss in [ losses.MSELossFunction(), losses.MAELossFunction(), losses.RankBoostLossFunction(request_column='fake_request') ]: print(loss) clf = UGradientBoostingRegressor(loss=loss, max_depth=3, n_estimators=50, learning_rate=0.01, subsample=0.5, train_features=list( trainX.columns[1:])) clf.fit(trainX, trainY) roc_auc = roc_auc_score(testY, clf.predict(testX)) assert roc_auc >= 0.7, "The quality is too poor: {} with loss: {}".format( roc_auc, loss)
def test_gradient_boosting(n_samples=1000): """ Testing workability of GradientBoosting with different loss function """ # Generating some samples correlated with first variable distance = 0.6 testX, testY = generate_sample(n_samples, 10, distance) trainX, trainY = generate_sample(n_samples, 10, distance) # We will try to get uniform distribution along this variable uniform_features = ['column0'] loss1 = LogLossFunction() loss2 = AdaLossFunction() loss3 = CompositeLossFunction() loss4 = KnnAdaLossFunction(uniform_features=uniform_features, uniform_label=1) loss5 = KnnAdaLossFunction(uniform_features=uniform_features, uniform_label=[0, 1]) loss6bin = BinFlatnessLossFunction(uniform_features, fl_coefficient=2., uniform_label=0) loss7bin = BinFlatnessLossFunction(uniform_features, fl_coefficient=2., uniform_label=[0, 1]) loss6knn = KnnFlatnessLossFunction(uniform_features, fl_coefficient=2., uniform_label=1) loss7knn = KnnFlatnessLossFunction(uniform_features, fl_coefficient=2., uniform_label=[0, 1]) for loss in [ loss1, loss2, loss3, loss4, loss5, loss6bin, loss7bin, loss6knn, loss7knn ]: clf = UGradientBoostingClassifier(loss=loss, min_samples_split=20, max_depth=5, learning_rate=0.2, subsample=0.7, n_estimators=25, train_features=None) \ .fit(trainX[:n_samples], trainY[:n_samples]) result = clf.score(testX, testY) assert result >= 0.7, "The quality is too poor: {} with loss: {}".format( result, loss)
def test_gradient_boosting(n_samples=1000): """ Testing workability of GradientBoosting with different loss function """ # Generating some samples correlated with first variable distance = 0.6 testX, testY = generate_sample(n_samples, 10, distance) trainX, trainY = generate_sample(n_samples, 10, distance) # We will try to get uniform distribution along this variable uniform_features = ['column0'] loss1 = LogLossFunction() loss2 = AdaLossFunction() loss3 = losses.CompositeLossFunction() loss4 = losses.KnnAdaLossFunction(uniform_features=uniform_features, uniform_label=1) loss5 = losses.KnnAdaLossFunction(uniform_features=uniform_features, uniform_label=[0, 1]) loss6bin = losses.BinFlatnessLossFunction(uniform_features, fl_coefficient=2., uniform_label=0) loss7bin = losses.BinFlatnessLossFunction(uniform_features, fl_coefficient=2., uniform_label=[0, 1]) loss6knn = losses.KnnFlatnessLossFunction(uniform_features, fl_coefficient=2., uniform_label=1) loss7knn = losses.KnnFlatnessLossFunction(uniform_features, fl_coefficient=2., uniform_label=[0, 1]) for loss in [loss1, loss2, loss3, loss4, loss5, loss6bin, loss7bin, loss6knn, loss7knn]: clf = UGradientBoostingClassifier(loss=loss, min_samples_split=20, max_depth=5, learning_rate=0.2, subsample=0.7, n_estimators=25, train_features=None) \ .fit(trainX[:n_samples], trainY[:n_samples]) result = clf.score(testX, testY) assert result >= 0.7, "The quality is too poor: {} with loss: {}".format(result, loss) trainX['fake_request'] = numpy.random.randint(0, 4, size=len(trainX)) for loss in [losses.MSELossFunction(), losses.MAELossFunction(), losses.RankBoostLossFunction(request_column='fake_request')]: print(loss) clf = UGradientBoostingRegressor(loss=loss, max_depth=3, n_estimators=50, learning_rate=0.01, subsample=0.5, train_features=list(trainX.columns[1:])) clf.fit(trainX, trainY) roc_auc = roc_auc_score(testY, clf.predict(testX)) assert roc_auc >= 0.7, "The quality is too poor: {} with loss: {}".format(roc_auc, loss)