Пример #1
0
 def test_add_predictors_target_not_in_sync_raise_ValueError(self):
     stacker = LinearPredictorStacker(metric=metric_rmse)
     self.assertRaises(ValueError,
                       stacker.add_predictors_by_filename,
                       files=[
                           get_path('noid_OOF_predictions_1.csv'),
                           get_path('noid_OOF_predictions_2.csv')
                       ])
Пример #2
0
 def test_standard_fit_regression_stacker_no_bagging_free_weights(self):
     """Test regression stacking with metric mean absolute error, no bagging"""
     stacker = LinearPredictorStacker(
         metric=metric_mae,
         algorithm=LinearPredictorStacker.STANDARD,
         normed_weights=False,
         max_iter=200,
         n_bags=1,
         colsample=1.,
         subsample=1.,
         verbose=2,
         eps=1e-3,
         seed=0)
     stacker.add_predictors_by_filename(files=[
         get_path('noid_OOF_predictions_2.csv'),
         get_path('noid_OOF_predictions_3.csv'),
         get_path('noid_OOF_predictions_4.csv')
     ])
     self.assertEqual(len(stacker.target), 1000)
     self.assertEqual(len(stacker.predictors), 1000)
     self.assertEqual(stacker.predictors.shape[1], 22)
     stacker.fit()
     print(stacker.get_weights())
     self.assertAlmostEqual(stacker.score, 1185.0745095110819, places=4)
     self.assertAlmostEqual(stacker.mean_score,
                            1188.5725272161117,
                            places=4)
Пример #3
0
 def test_fit_swapping_regression_stacker_no_bagging_normed_weights(self):
     """Test regression stacking with metric mean absolute error, no bagging"""
     stacker = LinearPredictorStacker(
         metric=metric_mae,
         algorithm=LinearPredictorStacker.SWAPPING,
         normed_weights=True,
         max_iter=2000,
         n_bags=1,
         colsample=1.,
         subsample=1.,
         verbose=2,
         eps=1e-3,
         seed=11)
     stacker.add_predictors_by_filename(files=[
         get_path('noid_OOF_predictions_2.csv'),
         get_path('noid_OOF_predictions_3.csv'),
         get_path('noid_OOF_predictions_4.csv')
     ])
     self.assertEqual(len(stacker.target), 1000)
     self.assertEqual(len(stacker.predictors), 1000)
     self.assertEqual(stacker.predictors.shape[1], 22)
     stacker.fit()
     self.assertAlmostEqual(stacker.score, 1156.3066, places=4)
     self.assertAlmostEqual(stacker.mean_score,
                            1188.5725272161117,
                            places=4)
Пример #4
0
 def test_init_stacker(self):
     """Check LinearPredictorStacker's attributes initialization"""
     stacker = LinearPredictorStacker(metric=metric_rmse)
     self.assertEqual(stacker.metric, metric_rmse)
     self.assertEqual(stacker.predictors, None)
     self.assertEqual(stacker.target, None)
     self.assertEqual(stacker.weights, None)
     self.assertEqual(stacker.score, None)
     self.assertEqual(stacker.maximize, False)
     self.assertEqual(stacker.algo, LinearPredictorStacker.STANDARD)
     self.assertEqual(stacker.max_predictors, 1.0)
     self.assertEqual(stacker.max_samples, 1.0)
     self.assertEqual(stacker.n_bags, 1)
     self.assertEqual(stacker.max_iter, 10)
     # self.assertEqual(stacker.step, 1)
     self.assertEqual(stacker.verbose, 0)
     self.assertEqual(stacker.verb_round, 1)
     self.assertEqual(stacker.normed_weights, True)
     self.assertEqual(stacker.eps, 1e-5)
     self.assertEqual(stacker.seed, None)
Пример #5
0
 def test_fit_with_predictors_and_target_exception_tests(self):
     data = pd.read_csv(get_path('noid_OOF_predictions_2.csv'))
     target = data.loss
     data.drop(['loss'], axis=1, inplace=True)
     stacker = LinearPredictorStacker(
         metric=metric_rmse,
         algorithm=LinearPredictorStacker.SWAPPING,
         max_iter=20,
         n_bags=1,
         colsample=1.,
         subsample=1.,
         verbose=0)
     # Predictors and target do not have same length
     self.assertRaises(ValueError,
                       stacker.fit,
                       predictors=data,
                       target=target.head(100))
     # Predictors contain null values
     data_null = data.copy()
     data_null[data_null < 1000] = np.nan
     self.assertRaises(ValueError,
                       stacker.fit,
                       predictors=data_null,
                       target=target)
     # target contains null values
     target_null = target.copy()
     target_null[target_null < 1000] = np.nan
     self.assertRaises(ValueError,
                       stacker.fit,
                       predictors=data,
                       target=target_null)
     # target contains more than one columns
     self.assertRaises(ValueError,
                       stacker.fit,
                       predictors=data,
                       target=data)
Пример #6
0
 def test_fit_regression_stacker_mae_ten_bags(self):
     """Test regression stacking with metric mean absolute error and 20 bags"""
     stacker = LinearPredictorStacker(metric=metric_mae,
                                      max_iter=10,
                                      n_bags=20,
                                      colsample=.8,
                                      subsample=.8,
                                      seed=24698537)
     stacker.add_predictors_by_filename(files=[
         get_path('noid_OOF_predictions_2.csv'),
         get_path('noid_OOF_predictions_3.csv'),
         get_path('noid_OOF_predictions_4.csv')
     ])
     self.assertEqual(len(stacker.target), 1000)
     self.assertEqual(len(stacker.predictors), 1000)
     self.assertEqual(stacker.predictors.shape[1], 22)
     stacker.fit()
     self.assertAlmostEqual(stacker.score, 1179.2406088734808, places=4)
     # Old version self.assertAlmostEqual(stacker.score, 1187.6537373418842, places=4)
     self.assertAlmostEqual(stacker.mean_score,
                            1188.5725272161117,
                            places=4)
Пример #7
0
 def test_fit_regression_stacker_rmse_no_bagging_step_decrease(self):
     """Test regression stacking with metric root mean squared error, no bagging"""
     stacker = LinearPredictorStacker(metric=metric_rmse,
                                      max_iter=250,
                                      n_bags=1,
                                      colsample=1.,
                                      subsample=1.,
                                      verbose=0)
     stacker.add_predictors_by_filename(files=[
         get_path('noid_OOF_predictions_2.csv'),
         get_path('noid_OOF_predictions_3.csv'),
         get_path('noid_OOF_predictions_4.csv')
     ])
     self.assertEqual(len(stacker.target), 1000)
     self.assertEqual(len(stacker.predictors), 1000)
     self.assertEqual(stacker.predictors.shape[1], 22)
     stacker.fit()
     self.assertAlmostEqual(stacker.score, 1972.574584232116, places=4)
     # Old version self.assertAlmostEqual(stacker.score, 2030.5021340510675, places=4)
     self.assertAlmostEqual(stacker.mean_score,
                            2032.2110846499691,
                            places=4)
Пример #8
0
 def test_fit_regression_stacker_mae_no_bagging(self):
     """Test regression stacking with metric mean absolute error, no bagging"""
     stacker = LinearPredictorStacker(metric=metric_mae,
                                      max_iter=20,
                                      n_bags=1,
                                      colsample=1.,
                                      subsample=1.,
                                      verbose=0)
     stacker.add_predictors_by_filename(files=[
         get_path('noid_OOF_predictions_2.csv'),
         get_path('noid_OOF_predictions_3.csv'),
         get_path('noid_OOF_predictions_4.csv')
     ])
     self.assertEqual(len(stacker.target), 1000)
     self.assertEqual(len(stacker.predictors), 1000)
     self.assertEqual(stacker.predictors.shape[1], 22)
     stacker.fit()
     self.assertAlmostEqual(stacker.score, 1176.295406, places=4)
     # Old version self.assertAlmostEqual(stacker.score, 1187.1916616561432, places=4)
     self.assertAlmostEqual(stacker.mean_score,
                            1188.5725272161117,
                            places=4)
Пример #9
0
 def test_swapping_fit_regression_stacker_rmse_no_bagging(self):
     """Test regression stacking with metric root mean squared error, no bagging"""
     stacker = LinearPredictorStacker(
         metric=metric_rmse,
         algorithm=LinearPredictorStacker.SWAPPING,
         max_iter=20,
         n_bags=1,
         colsample=1.,
         subsample=1.,
         verbose=0)
     stacker.add_predictors_by_filename(files=[
         get_path('noid_OOF_predictions_2.csv'),
         get_path('noid_OOF_predictions_3.csv'),
         get_path('noid_OOF_predictions_4.csv')
     ])
     self.assertEqual(len(stacker.target), 1000)
     self.assertEqual(len(stacker.predictors), 1000)
     self.assertEqual(stacker.predictors.shape[1], 22)
     stacker.fit()
     self.assertAlmostEqual(stacker.score, 2008.0897782026507, places=4)
     self.assertAlmostEqual(stacker.mean_score,
                            2032.2110846499691,
                            places=4)
Пример #10
0
 def test_regression_r2_score_maximization(self):
     stacker = LinearPredictorStacker(
         metric=r2_score,
         maximize=True,
         algorithm=LinearPredictorStacker.STANDARD,
         normed_weights=False,
         max_iter=200,
         n_bags=1,
         colsample=1.,
         subsample=1.,
         verbose=2,
         eps=1e-4,
         seed=0)
     stacker.add_predictors_by_filename(files=[
         get_path('noid_OOF_predictions_2.csv'),
         get_path('noid_OOF_predictions_3.csv'),
         get_path('noid_OOF_predictions_4.csv')
     ])
     self.assertEqual(len(stacker.target), 1000)
     self.assertEqual(len(stacker.predictors), 1000)
     self.assertEqual(stacker.predictors.shape[1], 22)
     stacker.fit()
     self.assertAlmostEqual(stacker.score, 0.5478882, places=4)
     self.assertAlmostEqual(stacker.mean_score, 0.537271112149033, places=4)
Пример #11
0
 def test_add_predictors_not_a_string_raise_TypeError(self):
     stacker = LinearPredictorStacker(metric=metric_rmse)
     self.assertRaises(TypeError,
                       stacker.add_predictors_by_filename,
                       files=[1, 2])
Пример #12
0
 def test_add_predictors_file_error_raise_ValueError(self):
     stacker = LinearPredictorStacker(metric=metric_rmse)
     self.assertRaises(ValueError,
                       stacker.add_predictors_by_filename,
                       files=[get_path('does_not_exist.csv')])
Пример #13
0
 def test_fit_without_predictors_raise_ValueError(self):
     stacker = LinearPredictorStacker(metric=metric_rmse)
     self.assertRaises(ValueError, stacker.fit)