Пример #1
0
    def test_ensemble_supports_user_defined_transforms(self):
        test2_df = test_df.copy(deep=True)
        test2_df = test2_df.append(pd.DataFrame({'c1': [9, 11], 'c2': [1, 1]}))

        r1 = OrdinaryLeastSquaresRegressor(**olsrArgs)
        r1.fit(train_df)
        result1 = r1.predict(test2_df)

        r2 = OnlineGradientDescentRegressor(**ogdArgs)
        r2.fit(train_df)
        result2 = r2.predict(test2_df)

        r3 = LightGbmRegressor(**lgbmArgs)
        r3.fit(train_df)
        result3 = r3.predict(test2_df)

        r1 = OrdinaryLeastSquaresRegressor(**olsrArgs)
        r2 = OnlineGradientDescentRegressor(**ogdArgs)
        r3 = LightGbmRegressor(**lgbmArgs)

        pipeline = Pipeline([
            RangeFilter(min=0, max=10, columns='c1'),
            VotingRegressor(estimators=[r1, r2, r3], combiner='Average')
        ])
        pipeline.fit(train_df)
        result4 = pipeline.predict(test2_df)

        self.assertEqual(len(result4), 3)

        average1 = (result1[0] + result2[0] + result3[0]) / 3
        average2 = (result1[1] + result2[1] + result3[1]) / 3
        average3 = (result1[2] + result2[2] + result3[2]) / 3
        self.assertAlmostEqual(average1, result4.loc[0, 'Score'], places=5)
        self.assertAlmostEqual(average2, result4.loc[1, 'Score'], places=5)
        self.assertAlmostEqual(average3, result4.loc[2, 'Score'], places=5)
Пример #2
0
    def test_lightgbmregressor(self):
        np.random.seed(0)

        df = get_dataset("airquality").as_df().fillna(0)
        df = df[df.Ozone.notnull()]

        X_train, X_test, y_train, y_test = train_test_split(
            df.loc[:, df.columns != 'Ozone'], df['Ozone'])

        # Train a model and score
        ftree = LightGbmRegressor().fit(X_train, y_train)
        scores = ftree.predict(X_test)

        r2 = r2_score(y_test, scores)
        assert_greater(r2, 0.32, "should be greater than %s" % 0.32)
        assert_less(r2, 0.33, "sum should be less than %s" % 0.33)
Пример #3
0
    def test_ensemble_with_average_and_median_combiner(self):
        r1 = OrdinaryLeastSquaresRegressor(**olsrArgs)
        r1.fit(train_df)
        result1 = r1.predict(test_df)

        r2 = OnlineGradientDescentRegressor(**ogdArgs)
        r2.fit(train_df)
        result2 = r2.predict(test_df)

        r3 = LightGbmRegressor(**lgbmArgs)
        r3.fit(train_df)
        result3 = r3.predict(test_df)

        r1 = OrdinaryLeastSquaresRegressor(**olsrArgs)
        r2 = OnlineGradientDescentRegressor(**ogdArgs)
        r3 = LightGbmRegressor(**lgbmArgs)

        pipeline = Pipeline([VotingRegressor(estimators=[r1, r2, r3], combiner='Average')])
        pipeline.fit(train_df)
        result4 = pipeline.predict(test_df)

        average1 = (result1[0] + result2[0] + result3[0]) / 3
        average2 = (result1[1] + result2[1] + result3[1]) / 3
        self.assertAlmostEqual(average1, result4.loc[0, 'Score'], places=5)
        self.assertAlmostEqual(average2, result4.loc[1, 'Score'], places=5)

        r1 = OrdinaryLeastSquaresRegressor(**olsrArgs)
        r2 = OnlineGradientDescentRegressor(**ogdArgs)
        r3 = LightGbmRegressor(**lgbmArgs)

        pipeline = Pipeline([VotingRegressor(estimators=[r1, r2, r3], combiner='Median')])
        pipeline.fit(train_df)
        result4 = pipeline.predict(test_df)

        median1 = sorted([result1.loc[0], result2.loc[0], result3.loc[0]])[1]
        median2 = sorted([result1.loc[1], result2.loc[1], result3.loc[1]])[1]

        self.assertEqual(median1, result4.loc[0, 'Score'])
        self.assertEqual(median2, result4.loc[1, 'Score'])
Пример #4
0
}

if show_individual_predictions:
    r1 = OrdinaryLeastSquaresRegressor(**olsrArgs)
    r1.fit(train_df)
    result = r1.predict(test_df)
    print(result)

    r2 = OnlineGradientDescentRegressor(**ogdArgs)
    r2.fit(train_df)
    result = r2.predict(test_df)
    print(result)

    r3 = LightGbmRegressor(**lgbmArgs)
    r3.fit(train_df)
    result = r3.predict(test_df)
    print(result)

# Perform a prediction using an ensemble
# of all three of the above predictors.

r1 = OrdinaryLeastSquaresRegressor(**olsrArgs)
r2 = OnlineGradientDescentRegressor(**ogdArgs)
r3 = LightGbmRegressor(**lgbmArgs)
pipeline = Pipeline(
    [VotingRegressor(estimators=[r1, r2, r3], combiner='Average')])

pipeline.fit(train_df)
result = pipeline.predict(test_df)
print(result)
###############################################################################
# LightGbmRegressor
import numpy as np
from nimbusml.datasets import get_dataset
from nimbusml.ensemble import LightGbmRegressor
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

# use the built-in data set 'airquality' to create test and train data
#    Unnamed: 0  Ozone  Solar_R  Wind  Temp  Month  Day
# 0           1   41.0    190.0   7.4    67      5    1
# 1           2   36.0    118.0   8.0    72      5    2
np.random.seed(0)

df = get_dataset("airquality").as_df().fillna(0)
df = df[df.Ozone.notnull()]

X_train, X_test, y_train, y_test = train_test_split(
    df.loc[:, df.columns != 'Ozone'], df['Ozone'])

# train a model and score
ftree = LightGbmRegressor().fit(X_train, y_train)
scores = ftree.predict(X_test)

# evaluate the model
print('R-squared fit:', r2_score(y_test, scores))