def test_ensemble_supports_user_defined_transforms(self): test2_df = test_df.copy(deep=True) test2_df = test2_df.append(pd.DataFrame({'c1': [9, 11], 'c2': [1, 1]})) r1 = OrdinaryLeastSquaresRegressor(**olsrArgs) r1.fit(train_df) result1 = r1.predict(test2_df) r2 = OnlineGradientDescentRegressor(**ogdArgs) r2.fit(train_df) result2 = r2.predict(test2_df) r3 = LightGbmRegressor(**lgbmArgs) r3.fit(train_df) result3 = r3.predict(test2_df) r1 = OrdinaryLeastSquaresRegressor(**olsrArgs) r2 = OnlineGradientDescentRegressor(**ogdArgs) r3 = LightGbmRegressor(**lgbmArgs) pipeline = Pipeline([ RangeFilter(min=0, max=10, columns='c1'), VotingRegressor(estimators=[r1, r2, r3], combiner='Average') ]) pipeline.fit(train_df) result4 = pipeline.predict(test2_df) self.assertEqual(len(result4), 3) average1 = (result1[0] + result2[0] + result3[0]) / 3 average2 = (result1[1] + result2[1] + result3[1]) / 3 average3 = (result1[2] + result2[2] + result3[2]) / 3 self.assertAlmostEqual(average1, result4.loc[0, 'Score'], places=5) self.assertAlmostEqual(average2, result4.loc[1, 'Score'], places=5) self.assertAlmostEqual(average3, result4.loc[2, 'Score'], places=5)
def test_lightgbmregressor(self): np.random.seed(0) df = get_dataset("airquality").as_df().fillna(0) df = df[df.Ozone.notnull()] X_train, X_test, y_train, y_test = train_test_split( df.loc[:, df.columns != 'Ozone'], df['Ozone']) # Train a model and score ftree = LightGbmRegressor().fit(X_train, y_train) scores = ftree.predict(X_test) r2 = r2_score(y_test, scores) assert_greater(r2, 0.32, "should be greater than %s" % 0.32) assert_less(r2, 0.33, "sum should be less than %s" % 0.33)
def test_ensemble_with_average_and_median_combiner(self): r1 = OrdinaryLeastSquaresRegressor(**olsrArgs) r1.fit(train_df) result1 = r1.predict(test_df) r2 = OnlineGradientDescentRegressor(**ogdArgs) r2.fit(train_df) result2 = r2.predict(test_df) r3 = LightGbmRegressor(**lgbmArgs) r3.fit(train_df) result3 = r3.predict(test_df) r1 = OrdinaryLeastSquaresRegressor(**olsrArgs) r2 = OnlineGradientDescentRegressor(**ogdArgs) r3 = LightGbmRegressor(**lgbmArgs) pipeline = Pipeline([VotingRegressor(estimators=[r1, r2, r3], combiner='Average')]) pipeline.fit(train_df) result4 = pipeline.predict(test_df) average1 = (result1[0] + result2[0] + result3[0]) / 3 average2 = (result1[1] + result2[1] + result3[1]) / 3 self.assertAlmostEqual(average1, result4.loc[0, 'Score'], places=5) self.assertAlmostEqual(average2, result4.loc[1, 'Score'], places=5) r1 = OrdinaryLeastSquaresRegressor(**olsrArgs) r2 = OnlineGradientDescentRegressor(**ogdArgs) r3 = LightGbmRegressor(**lgbmArgs) pipeline = Pipeline([VotingRegressor(estimators=[r1, r2, r3], combiner='Median')]) pipeline.fit(train_df) result4 = pipeline.predict(test_df) median1 = sorted([result1.loc[0], result2.loc[0], result3.loc[0]])[1] median2 = sorted([result1.loc[1], result2.loc[1], result3.loc[1]])[1] self.assertEqual(median1, result4.loc[0, 'Score']) self.assertEqual(median2, result4.loc[1, 'Score'])
} if show_individual_predictions: r1 = OrdinaryLeastSquaresRegressor(**olsrArgs) r1.fit(train_df) result = r1.predict(test_df) print(result) r2 = OnlineGradientDescentRegressor(**ogdArgs) r2.fit(train_df) result = r2.predict(test_df) print(result) r3 = LightGbmRegressor(**lgbmArgs) r3.fit(train_df) result = r3.predict(test_df) print(result) # Perform a prediction using an ensemble # of all three of the above predictors. r1 = OrdinaryLeastSquaresRegressor(**olsrArgs) r2 = OnlineGradientDescentRegressor(**ogdArgs) r3 = LightGbmRegressor(**lgbmArgs) pipeline = Pipeline( [VotingRegressor(estimators=[r1, r2, r3], combiner='Average')]) pipeline.fit(train_df) result = pipeline.predict(test_df) print(result)
############################################################################### # LightGbmRegressor import numpy as np from nimbusml.datasets import get_dataset from nimbusml.ensemble import LightGbmRegressor from sklearn.metrics import r2_score from sklearn.model_selection import train_test_split # use the built-in data set 'airquality' to create test and train data # Unnamed: 0 Ozone Solar_R Wind Temp Month Day # 0 1 41.0 190.0 7.4 67 5 1 # 1 2 36.0 118.0 8.0 72 5 2 np.random.seed(0) df = get_dataset("airquality").as_df().fillna(0) df = df[df.Ozone.notnull()] X_train, X_test, y_train, y_test = train_test_split( df.loc[:, df.columns != 'Ozone'], df['Ozone']) # train a model and score ftree = LightGbmRegressor().fit(X_train, y_train) scores = ftree.predict(X_test) # evaluate the model print('R-squared fit:', r2_score(y_test, scores))